summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/afs/callback.c4
-rw-r--r--fs/afs/cmservice.c168
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/fsclient.c148
-rw-r--r--fs/afs/internal.h38
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/rxrpc.c522
-rw-r--r--fs/afs/server.c11
-rw-r--r--fs/afs/vlclient.c7
-rw-r--r--fs/afs/vlocation.c4
-rw-r--r--fs/autofs4/expire.c55
-rw-r--r--fs/binfmt_elf.c23
-rw-r--r--fs/btrfs/extent-tree.c9
-rw-r--r--fs/btrfs/ioctl.c12
-rw-r--r--fs/configfs/file.c1
-rw-r--r--fs/debugfs/file.c15
-rw-r--r--fs/debugfs/internal.h4
-rw-r--r--fs/devpts/inode.c71
-rw-r--r--fs/efivarfs/inode.c5
-rw-r--r--fs/efivarfs/super.c13
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c10
-rw-r--r--fs/fuse/Kconfig1
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/acl.c99
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/dir.c288
-rw-r--r--fs/fuse/file.c68
-rw-r--r--fs/fuse/fuse_i.h40
-rw-r--r--fs/fuse/inode.c28
-rw-r--r--fs/fuse/xattr.c211
-rw-r--r--fs/gfs2/aops.c19
-rw-r--r--fs/gfs2/bmap.c6
-rw-r--r--fs/gfs2/dir.c20
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/meta_io.c35
-rw-r--r--fs/gfs2/ops_fstype.c8
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c3
-rw-r--r--fs/jfs/resize.c10
-rw-r--r--fs/locks.c89
-rw-r--r--fs/notify/fanotify/fanotify.c13
-rw-r--r--fs/notify/fanotify/fanotify_user.c36
-rw-r--r--fs/notify/group.c19
-rw-r--r--fs/notify/notification.c23
-rw-r--r--fs/ocfs2/alloc.c56
-rw-r--r--fs/ocfs2/aops.c10
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h5
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c12
-rw-r--r--fs/ocfs2/file.c34
-rw-r--r--fs/ocfs2/suballoc.c14
-rw-r--r--fs/overlayfs/copy_up.c22
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/kcore.c31
-rw-r--r--fs/proc/proc_net.c13
-rw-r--r--fs/proc/proc_sysctl.c5
-rw-r--r--fs/quota/quota.c18
-rw-r--r--fs/ramfs/file-mmu.c9
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/sysfs/group.c4
-rw-r--r--fs/udf/file.c18
69 files changed, 1397 insertions, 1062 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad775842..3ef62bad8f2b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -79,6 +79,7 @@ config EXPORTFS_BLOCK_OPS
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
default y
+ select PERCPU_RWSEM
help
This option enables standard file locking support, required
for filesystems like NFS and for the flock() system
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7ef637d7f3a5..1e9d2f84e5b5 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -461,8 +461,8 @@ static void afs_callback_updater(struct work_struct *work)
*/
int __init afs_callback_update_init(void)
{
- afs_callback_update_worker =
- create_singlethread_workqueue("kafs_callbackd");
+ afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
+ WQ_MEM_RECLAIM);
return afs_callback_update_worker ? 0 : -ENOMEM;
}
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 85737e96ab8b..2037e7a77a37 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -17,19 +17,12 @@
#include "internal.h"
#include "afs_cm.h"
-#if 0
-struct workqueue_struct *afs_cm_workqueue;
-#endif /* 0 */
-
-static int afs_deliver_cb_init_call_back_state(struct afs_call *,
- struct sk_buff *, bool);
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
- struct sk_buff *, bool);
-static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
- struct sk_buff *, bool);
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
static void afs_cm_destructor(struct afs_call *);
/*
@@ -134,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call)
* received. The step number here must match the final number in
* afs_deliver_cb_callback().
*/
- if (call->unmarshall == 6) {
+ if (call->unmarshall == 5) {
ASSERT(call->server && call->count && call->request);
afs_break_callbacks(call->server, call->count, call->request);
}
@@ -168,27 +161,27 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
/*
* deliver request data to a CB.CallBack call
*/
-static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_callback(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_callback *cb;
struct afs_server *server;
- struct in_addr addr;
__be32 *bp;
u32 tmp;
int ret, loop;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
call->offset = 0;
call->unmarshall++;
/* extract the FID array and its count in two steps */
case 1:
_debug("extract FID count");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -205,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
case 2:
_debug("extract FID array");
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count * 3 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ call->count * 3 * 4, true);
if (ret < 0)
return ret;
@@ -232,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
/* extract the callback array and its count in two steps */
case 3:
_debug("extract CB count");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -242,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
return -EBADMSG;
call->offset = 0;
call->unmarshall++;
- if (tmp == 0)
- goto empty_cb_array;
case 4:
_debug("extract CB array");
- ret = afs_extract_data(call, skb, last, call->request,
- call->count * 3 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ call->count * 3 * 4, false);
if (ret < 0)
return ret;
@@ -261,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
cb->type = ntohl(*bp++);
}
- empty_cb_array:
call->offset = 0;
call->unmarshall++;
- case 5:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
-
/* Record that the message was unmarshalled successfully so
* that the call destructor can know do the callback breaking
* work, even if the final ACK isn't received.
@@ -278,17 +263,15 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
* updated also.
*/
call->unmarshall++;
- case 6:
+ case 5:
break;
}
-
call->state = AFS_CALL_REPLYING;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -315,17 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
/*
* deliver request data to a CB.InitCallBackState call
*/
-static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
- struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct in_addr addr;
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
+
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
@@ -334,8 +317,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -348,27 +330,68 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
/*
* deliver request data to a CB.InitCallBackState3 call
*/
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
- struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
{
+ struct sockaddr_rxrpc srx;
struct afs_server *server;
- struct in_addr addr;
+ struct afs_uuid *r;
+ unsigned loop;
+ __be32 *b;
+ int ret;
+
+ _enter("");
+
+ rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+
+ _enter("{%u}", call->unmarshall);
- _enter(",{%u},%d", skb->len, last);
+ switch (call->unmarshall) {
+ case 0:
+ call->offset = 0;
+ call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+ if (!call->buffer)
+ return -ENOMEM;
+ call->unmarshall++;
- /* There are some arguments that we ignore */
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
+ case 1:
+ _debug("extract UUID");
+ ret = afs_extract_data(call, call->buffer,
+ 11 * sizeof(__be32), false);
+ switch (ret) {
+ case 0: break;
+ case -EAGAIN: return 0;
+ default: return ret;
+ }
+
+ _debug("unmarshall UUID");
+ call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+ if (!call->request)
+ return -ENOMEM;
+
+ b = call->buffer;
+ r = call->request;
+ r->time_low = ntohl(b[0]);
+ r->time_mid = ntohl(b[1]);
+ r->time_hi_and_version = ntohl(b[2]);
+ r->clock_seq_hi_and_reserved = ntohl(b[3]);
+ r->clock_seq_low = ntohl(b[4]);
+
+ for (loop = 0; loop < 6; loop++)
+ r->node[loop] = ntohl(b[loop + 5]);
+
+ call->offset = 0;
+ call->unmarshall++;
+
+ case 2:
+ break;
+ }
/* no unmarshalling required */
call->state = AFS_CALL_REPLYING;
/* we'll need the file server record as that tells us which set of
* vnodes to operate upon */
- memcpy(&addr, &ip_hdr(skb)->saddr, 4);
- server = afs_find_server(&addr);
+ server = afs_find_server(&srx);
if (!server)
return -ENOTCONN;
call->server = server;
@@ -393,14 +416,13 @@ static void SRXAFSCB_Probe(struct work_struct *work)
/*
* deliver request data to a CB.Probe call
*/
-static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_probe(struct afs_call *call)
{
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
@@ -426,7 +448,6 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
_enter("");
-
if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
reply.match = htonl(0);
else
@@ -439,19 +460,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
/*
* deliver request data to a CB.ProbeUuid call
*/
-static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
{
struct afs_uuid *r;
unsigned loop;
__be32 *b;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
-
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -463,8 +479,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
case 1:
_debug("extract UUID");
- ret = afs_extract_data(call, skb, last, call->buffer,
- 11 * sizeof(__be32));
+ ret = afs_extract_data(call, call->buffer,
+ 11 * sizeof(__be32), false);
switch (ret) {
case 0: break;
case -EAGAIN: return 0;
@@ -491,16 +507,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
call->unmarshall++;
case 2:
- _debug("trailer");
- if (skb->len != 0)
- return -EBADMSG;
break;
}
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
-
call->state = AFS_CALL_REPLYING;
INIT_WORK(&call->work, SRXAFSCB_ProbeUuid);
@@ -574,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work)
/*
* deliver request data to a CB.TellMeAboutYourself call
*/
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
{
int ret;
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
- ret = afs_data_complete(call, skb, last);
+ ret = afs_extract_data(call, NULL, 0, false);
if (ret < 0)
return ret;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index d91a9c9cfbd0..3191dff2c156 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -36,8 +36,8 @@ static int afs_init_lock_manager(void)
if (!afs_lock_manager) {
mutex_lock(&afs_lock_manager_mutex);
if (!afs_lock_manager) {
- afs_lock_manager =
- create_singlethread_workqueue("kafs_lockd");
+ afs_lock_manager = alloc_workqueue("kafs_lockd",
+ WQ_MEM_RECLAIM, 0);
if (!afs_lock_manager)
ret = -ENOMEM;
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 9312b92e54be..96f4d764d1a6 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
/*
* deliver reply data to an FS.FetchStatus
*/
-static int afs_deliver_fs_fetch_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
/*
* deliver reply data to an FS.FetchData
*/
-static int afs_deliver_fs_fetch_data(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
@@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
void *buffer;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
* client) */
case 1:
_debug("extract data length (MSW)");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
/* extract the returned data length */
case 2:
_debug("extract data length");
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
_debug("extract data");
if (call->count > 0) {
page = call->reply3;
- buffer = kmap_atomic(page);
- ret = afs_extract_data(call, skb, last, buffer,
- call->count);
- kunmap_atomic(buffer);
+ buffer = kmap(page);
+ ret = afs_extract_data(call, buffer,
+ call->count, true);
+ kunmap(buffer);
if (ret < 0)
return ret;
}
@@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
/* extract the metadata */
case 4:
- ret = afs_extract_data(call, skb, last, call->buffer,
- (21 + 3 + 6) * 4);
+ ret = afs_extract_data(call, call->buffer,
+ (21 + 3 + 6) * 4, false);
if (ret < 0)
return ret;
@@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
call->unmarshall++;
case 5:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
break;
}
if (call->count < PAGE_SIZE) {
_debug("clear");
page = call->reply3;
- buffer = kmap_atomic(page);
+ buffer = kmap(page);
memset(buffer + call->count, 0, PAGE_SIZE - call->count);
- kunmap_atomic(buffer);
+ kunmap(buffer);
}
_leave(" = 0 [done]");
@@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server,
/*
* deliver reply data to an FS.GiveUpCallBacks
*/
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
{
- _enter(",{%u},%d", skb->len, last);
+ _enter("");
/* shouldn't be any reply data */
- return afs_data_complete(call, skb, last);
+ return afs_extract_data(call, NULL, 0, false);
}
/*
@@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
/*
* deliver reply data to an FS.CreateFile or an FS.MakeDir
*/
-static int afs_deliver_fs_create_vnode(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server,
/*
* deliver reply data to an FS.RemoveFile or FS.RemoveDir
*/
-static int afs_deliver_fs_remove(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_remove(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server,
/*
* deliver reply data to an FS.Link
*/
-static int afs_deliver_fs_link(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_link(struct afs_call *call)
{
struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server,
/*
* deliver reply data to an FS.Symlink
*/
-static int afs_deliver_fs_symlink(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_symlink(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server,
/*
* deliver reply data to an FS.Rename
*/
-static int afs_deliver_fs_rename(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_rename(struct afs_call *call)
{
struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server,
/*
* deliver reply data to an FS.StoreData
*/
-static int afs_deliver_fs_store_data(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_data(struct afs_call *call)
{
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
/*
* deliver reply data to an FS.StoreStatus
*/
-static int afs_deliver_fs_store_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_status(struct afs_call *call)
{
afs_dataversion_t *store_version;
struct afs_vnode *vnode = call->reply;
const __be32 *bp;
int ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
@@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key,
/*
* deliver reply data to an FS.GetVolumeStatus
*/
-static int afs_deliver_fs_get_volume_status(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
{
const __be32 *bp;
char *p;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
switch (call->unmarshall) {
case 0:
@@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the returned status record */
case 1:
_debug("extract status");
- ret = afs_extract_data(call, skb, last, call->buffer,
- 12 * 4);
+ ret = afs_extract_data(call, call->buffer,
+ 12 * 4, true);
if (ret < 0)
return ret;
@@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the volume name length */
case 2:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 3:
_debug("extract volname");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->count = 4 - (call->count & 3);
case 4:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, true);
if (ret < 0)
return ret;
@@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the offline message length */
case 5:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 6:
_debug("extract offline");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->count = 4 - (call->count & 3);
case 7:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, true);
if (ret < 0)
return ret;
@@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
/* extract the message of the day length */
case 8:
- ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+ ret = afs_extract_data(call, &call->tmp, 4, true);
if (ret < 0)
return ret;
@@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
case 9:
_debug("extract motd");
if (call->count > 0) {
- ret = afs_extract_data(call, skb, last, call->reply3,
- call->count);
+ ret = afs_extract_data(call, call->reply3,
+ call->count, true);
if (ret < 0)
return ret;
}
@@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call,
call->unmarshall++;
/* extract the message of the day padding */
- if ((call->count & 3) == 0) {
- call->unmarshall++;
- goto no_motd_padding;
- }
- call->count = 4 - (call->count & 3);
+ call->count = (4 - (call->count & 3)) & 3;
case 10:
- ret = afs_extract_data(call, skb, last, call->buffer,
- call->count);
+ ret = afs_extract_data(call, call->buffer,
+ call->count, false);
if (ret < 0)
return ret;
call->offset = 0;
call->unmarshall++;
- no_motd_padding:
-
case 11:
- ret = afs_data_complete(call, skb, last);
- if (ret < 0)
- return ret;
break;
}
@@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server,
/*
* deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
*/
-static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
{
const __be32 *bp;
int ret;
- _enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+ _enter("{%u}", call->unmarshall);
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index df976b2a7f40..5497c8496055 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -13,13 +13,13 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
-#include <linux/skbuff.h>
#include <linux/rxrpc.h>
#include <linux/key.h>
#include <linux/workqueue.h>
#include <linux/sched.h>
#include <linux/fscache.h>
#include <linux/backing-dev.h>
+#include <net/af_rxrpc.h>
#include "afs.h"
#include "afs_vl.h"
@@ -56,7 +56,7 @@ struct afs_mount_params {
*/
struct afs_wait_mode {
/* RxRPC received message notification */
- void (*rx_wakeup)(struct afs_call *call);
+ rxrpc_notify_rx_t notify_rx;
/* synchronous call waiter and call dispatched notification */
int (*wait)(struct afs_call *call);
@@ -75,10 +75,8 @@ struct afs_call {
const struct afs_call_type *type; /* type of call */
const struct afs_wait_mode *wait_mode; /* completion wait mode */
wait_queue_head_t waitq; /* processes awaiting completion */
- void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
struct work_struct async_work; /* asynchronous work processor */
struct work_struct work; /* actual work processor */
- struct sk_buff_head rx_queue; /* received packets */
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_server *server; /* server affected by incoming CM call */
@@ -92,6 +90,7 @@ struct afs_call {
void *reply4; /* reply buffer (fourth part) */
pgoff_t first; /* first page in mapping to deal with */
pgoff_t last; /* last page in mapping to deal with */
+ size_t offset; /* offset into received data store */
enum { /* call state */
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
@@ -99,21 +98,18 @@ struct afs_call {
AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */
AFS_CALL_REPLYING, /* replying to incoming call */
AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */
- AFS_CALL_COMPLETE, /* successfully completed */
- AFS_CALL_BUSY, /* server was busy */
- AFS_CALL_ABORTED, /* call was aborted */
- AFS_CALL_ERROR, /* call failed due to error */
+ AFS_CALL_COMPLETE, /* Completed or failed */
} state;
int error; /* error code */
+ u32 abort_code; /* Remote abort ID or 0 */
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
- unsigned reply_size; /* current size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
- unsigned offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
+ bool need_attention; /* T if RxRPC poked us */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
__be32 operation_ID; /* operation ID for an incoming call */
@@ -128,8 +124,7 @@ struct afs_call_type {
/* deliver request or reply data to an call
* - returning an error will cause the call to be aborted
*/
- int (*deliver)(struct afs_call *call, struct sk_buff *skb,
- bool last);
+ int (*deliver)(struct afs_call *call);
/* map an abort code to an error number */
int (*abort_to_error)(u32 abort_code);
@@ -607,29 +602,22 @@ extern void afs_proc_cell_remove(struct afs_cell *);
/*
* rxrpc.c
*/
+extern struct socket *afs_socket;
+
extern int afs_open_socket(void);
extern void afs_close_socket(void);
-extern void afs_data_consumed(struct afs_call *, struct sk_buff *);
extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
const struct afs_wait_mode *);
extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
size_t, size_t);
extern void afs_flat_call_destructor(struct afs_call *);
-extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool);
extern void afs_send_empty_reply(struct afs_call *);
extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
- size_t);
+extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static inline int afs_transfer_reply(struct afs_call *call)
{
- if (skb->len > 0)
- return -EBADMSG;
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
- return 0;
+ return afs_extract_data(call, call->buffer, call->reply_max, false);
}
/*
@@ -654,7 +642,7 @@ do { \
extern struct afs_server *afs_lookup_server(struct afs_cell *,
const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
extern void afs_put_server(struct afs_server *);
extern void __exit afs_purge_servers(void);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 35de0c04729f..0b187ef3b5b7 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/completion.h>
#include <linux/sched.h>
+#include <linux/random.h>
#include "internal.h"
MODULE_DESCRIPTION("AFS Client File System");
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 14d04c848465..59bdaa7527b6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -16,34 +16,36 @@
#include "internal.h"
#include "afs_cm.h"
-static struct socket *afs_socket; /* my RxRPC socket */
+struct socket *afs_socket; /* my RxRPC socket */
static struct workqueue_struct *afs_async_calls;
+static struct afs_call *afs_spare_incoming_call;
static atomic_t afs_outstanding_calls;
-static atomic_t afs_outstanding_skbs;
-static void afs_wake_up_call_waiter(struct afs_call *);
+static void afs_free_call(struct afs_call *);
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
static int afs_wait_for_call_to_complete(struct afs_call *);
-static void afs_wake_up_async_call(struct afs_call *);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct afs_call *);
-static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
-static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static int afs_deliver_cm_op_id(struct afs_call *);
/* synchronous call management */
const struct afs_wait_mode afs_sync_call = {
- .rx_wakeup = afs_wake_up_call_waiter,
+ .notify_rx = afs_wake_up_call_waiter,
.wait = afs_wait_for_call_to_complete,
};
/* asynchronous call management */
const struct afs_wait_mode afs_async_call = {
- .rx_wakeup = afs_wake_up_async_call,
+ .notify_rx = afs_wake_up_async_call,
.wait = afs_dont_wait_for_call_to_complete,
};
/* asynchronous incoming call management */
static const struct afs_wait_mode afs_async_incoming_call = {
- .rx_wakeup = afs_wake_up_async_call,
+ .notify_rx = afs_wake_up_async_call,
};
/* asynchronous incoming call initial processing */
@@ -53,17 +55,9 @@ static const struct afs_call_type afs_RXCMxxxx = {
.abort_to_error = afs_abort_to_error,
};
-static void afs_collect_incoming_call(struct work_struct *);
+static void afs_charge_preallocation(struct work_struct *);
-static struct sk_buff_head afs_incoming_calls;
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
-
-static void afs_async_workfn(struct work_struct *work)
-{
- struct afs_call *call = container_of(work, struct afs_call, async_work);
-
- call->async_workfn(call);
-}
+static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
static int afs_wait_atomic_t(atomic_t *p)
{
@@ -83,10 +77,8 @@ int afs_open_socket(void)
_enter("");
- skb_queue_head_init(&afs_incoming_calls);
-
ret = -ENOMEM;
- afs_async_calls = create_singlethread_workqueue("kafsd");
+ afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
if (!afs_async_calls)
goto error_0;
@@ -110,13 +102,15 @@ int afs_open_socket(void)
if (ret < 0)
goto error_2;
+ rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+ afs_rx_discard_new_call);
+
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
goto error_2;
- rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
-
afs_socket = socket;
+ afs_charge_preallocation(NULL);
_leave(" = 0");
return 0;
@@ -136,52 +130,28 @@ void afs_close_socket(void)
{
_enter("");
+ if (afs_spare_incoming_call) {
+ atomic_inc(&afs_outstanding_calls);
+ afs_free_call(afs_spare_incoming_call);
+ afs_spare_incoming_call = NULL;
+ }
+
+ _debug("outstanding %u", atomic_read(&afs_outstanding_calls));
wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
_debug("no outstanding calls");
+ flush_workqueue(afs_async_calls);
+ kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+ flush_workqueue(afs_async_calls);
sock_release(afs_socket);
_debug("dework");
destroy_workqueue(afs_async_calls);
-
- ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
_leave("");
}
/*
- * Note that the data in a socket buffer is now consumed.
- */
-void afs_data_consumed(struct afs_call *call, struct sk_buff *skb)
-{
- if (!skb) {
- _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
- dump_stack();
- } else {
- _debug("DLVR %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- rxrpc_kernel_data_consumed(call->rxcall, skb);
- }
-}
-
-/*
- * free a socket buffer
- */
-static void afs_free_skb(struct sk_buff *skb)
-{
- if (!skb) {
- _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
- dump_stack();
- } else {
- _debug("FREE %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
- if (atomic_dec_return(&afs_outstanding_skbs) == -1)
- BUG();
- rxrpc_kernel_free_skb(skb);
- }
-}
-
-/*
* free a call
*/
static void afs_free_call(struct afs_call *call)
@@ -191,7 +161,6 @@ static void afs_free_call(struct afs_call *call)
ASSERTCMP(call->rxcall, ==, NULL);
ASSERT(!work_pending(&call->async_work));
- ASSERT(skb_queue_empty(&call->rx_queue));
ASSERT(call->type->name != NULL);
kfree(call->request);
@@ -207,7 +176,7 @@ static void afs_free_call(struct afs_call *call)
static void afs_end_call_nofree(struct afs_call *call)
{
if (call->rxcall) {
- rxrpc_kernel_end_call(call->rxcall);
+ rxrpc_kernel_end_call(afs_socket, call->rxcall);
call->rxcall = NULL;
}
if (call->type->destructor)
@@ -227,7 +196,7 @@ static void afs_end_call(struct afs_call *call)
* allocate a call with flat request and reply buffers
*/
struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
- size_t request_size, size_t reply_size)
+ size_t request_size, size_t reply_max)
{
struct afs_call *call;
@@ -241,7 +210,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
call->type = type;
call->request_size = request_size;
- call->reply_max = reply_size;
+ call->reply_max = reply_max;
if (request_size) {
call->request = kmalloc(request_size, GFP_NOFS);
@@ -249,14 +218,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
goto nomem_free;
}
- if (reply_size) {
- call->buffer = kmalloc(reply_size, GFP_NOFS);
+ if (reply_max) {
+ call->buffer = kmalloc(reply_max, GFP_NOFS);
if (!call->buffer)
goto nomem_free;
}
init_waitqueue_head(&call->waitq);
- skb_queue_head_init(&call->rx_queue);
return call;
nomem_free:
@@ -325,8 +293,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
* returns from sending the request */
if (first + loop >= last)
call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(call->rxcall, msg,
- to - offset);
+ ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+ msg, to - offset);
kunmap(pages[loop]);
if (ret < 0)
break;
@@ -354,7 +322,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
int ret;
- struct sk_buff *skb;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -366,8 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
- call->async_workfn = afs_process_async_call;
- INIT_WORK(&call->async_work, afs_async_workfn);
+ INIT_WORK(&call->async_work, afs_process_async_call);
memset(&srx, 0, sizeof(srx));
srx.srx_family = AF_RXRPC;
@@ -380,7 +346,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
/* create a call */
rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
- (unsigned long) call, gfp);
+ (unsigned long) call, gfp,
+ wait_mode->notify_rx);
call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -406,7 +373,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
* request */
if (!call->send_pages)
call->state = AFS_CALL_AWAIT_REPLY;
- ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+ ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+ &msg, call->request_size);
if (ret < 0)
goto error_do_abort;
@@ -421,9 +389,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
return wait_mode->wait(call);
error_do_abort:
- rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
+ rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
error_kill_call:
afs_end_call(call);
_leave(" = %d", ret);
@@ -431,140 +397,77 @@ error_kill_call:
}
/*
- * Handles intercepted messages that were arriving in the socket's Rx queue.
- *
- * Called from the AF_RXRPC call processor in waitqueue process context. For
- * each call, it is guaranteed this will be called in order of packet to be
- * delivered.
- */
-static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
- struct sk_buff *skb)
-{
- struct afs_call *call = (struct afs_call *) user_call_ID;
-
- _enter("%p,,%u", call, skb->mark);
-
- _debug("ICPT %p{%u} [%d]",
- skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-
- ASSERTCMP(sk, ==, afs_socket->sk);
- atomic_inc(&afs_outstanding_skbs);
-
- if (!call) {
- /* its an incoming call for our callback service */
- skb_queue_tail(&afs_incoming_calls, skb);
- queue_work(afs_wq, &afs_collect_incoming_call_work);
- } else {
- /* route the messages directly to the appropriate call */
- skb_queue_tail(&call->rx_queue, skb);
- call->wait_mode->rx_wakeup(call);
- }
-
- _leave("");
-}
-
-/*
* deliver messages to a call
*/
static void afs_deliver_to_call(struct afs_call *call)
{
- struct sk_buff *skb;
- bool last;
u32 abort_code;
int ret;
- _enter("");
-
- while ((call->state == AFS_CALL_AWAIT_REPLY ||
- call->state == AFS_CALL_AWAIT_OP_ID ||
- call->state == AFS_CALL_AWAIT_REQUEST ||
- call->state == AFS_CALL_AWAIT_ACK) &&
- (skb = skb_dequeue(&call->rx_queue))) {
- switch (skb->mark) {
- case RXRPC_SKB_MARK_DATA:
- _debug("Rcv DATA");
- last = rxrpc_kernel_is_data_last(skb);
- ret = call->type->deliver(call, skb, last);
- switch (ret) {
- case -EAGAIN:
- if (last) {
- _debug("short data");
- goto unmarshal_error;
- }
- break;
- case 0:
- ASSERT(last);
- if (call->state == AFS_CALL_AWAIT_REPLY)
- call->state = AFS_CALL_COMPLETE;
- break;
- case -ENOTCONN:
- abort_code = RX_CALL_DEAD;
- goto do_abort;
- case -ENOTSUPP:
- abort_code = RX_INVALID_OPERATION;
- goto do_abort;
- default:
- unmarshal_error:
- abort_code = RXGEN_CC_UNMARSHAL;
- if (call->state != AFS_CALL_AWAIT_REPLY)
- abort_code = RXGEN_SS_UNMARSHAL;
- do_abort:
- rxrpc_kernel_abort_call(call->rxcall,
- abort_code);
- call->error = ret;
- call->state = AFS_CALL_ERROR;
- break;
+ _enter("%s", call->type->name);
+
+ while (call->state == AFS_CALL_AWAIT_REPLY ||
+ call->state == AFS_CALL_AWAIT_OP_ID ||
+ call->state == AFS_CALL_AWAIT_REQUEST ||
+ call->state == AFS_CALL_AWAIT_ACK
+ ) {
+ if (call->state == AFS_CALL_AWAIT_ACK) {
+ size_t offset = 0;
+ ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ NULL, 0, &offset, false,
+ &call->abort_code);
+ if (ret == -EINPROGRESS || ret == -EAGAIN)
+ return;
+ if (ret == 1) {
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
}
- break;
- case RXRPC_SKB_MARK_FINAL_ACK:
- _debug("Rcv ACK");
- call->state = AFS_CALL_COMPLETE;
- break;
- case RXRPC_SKB_MARK_BUSY:
- _debug("Rcv BUSY");
- call->error = -EBUSY;
- call->state = AFS_CALL_BUSY;
- break;
- case RXRPC_SKB_MARK_REMOTE_ABORT:
- abort_code = rxrpc_kernel_get_abort_code(skb);
- call->error = call->type->abort_to_error(abort_code);
- call->state = AFS_CALL_ABORTED;
- _debug("Rcv ABORT %u -> %d", abort_code, call->error);
- break;
- case RXRPC_SKB_MARK_LOCAL_ABORT:
- abort_code = rxrpc_kernel_get_abort_code(skb);
- call->error = call->type->abort_to_error(abort_code);
- call->state = AFS_CALL_ABORTED;
- _debug("Loc ABORT %u -> %d", abort_code, call->error);
- break;
- case RXRPC_SKB_MARK_NET_ERROR:
- call->error = -rxrpc_kernel_get_error_number(skb);
- call->state = AFS_CALL_ERROR;
- _debug("Rcv NET ERROR %d", call->error);
- break;
- case RXRPC_SKB_MARK_LOCAL_ERROR:
- call->error = -rxrpc_kernel_get_error_number(skb);
- call->state = AFS_CALL_ERROR;
- _debug("Rcv LOCAL ERROR %d", call->error);
- break;
- default:
- BUG();
- break;
+ return;
}
- afs_free_skb(skb);
- }
-
- /* make sure the queue is empty if the call is done with (we might have
- * aborted the call early because of an unmarshalling error) */
- if (call->state >= AFS_CALL_COMPLETE) {
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
- if (call->incoming)
- afs_end_call(call);
+ ret = call->type->deliver(call);
+ switch (ret) {
+ case 0:
+ if (call->state == AFS_CALL_AWAIT_REPLY)
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
+ case -EINPROGRESS:
+ case -EAGAIN:
+ goto out;
+ case -ENOTCONN:
+ abort_code = RX_CALL_DEAD;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, -ret, "KNC");
+ goto do_abort;
+ case -ENOTSUPP:
+ abort_code = RX_INVALID_OPERATION;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, -ret, "KIV");
+ goto do_abort;
+ case -ENODATA:
+ case -EBADMSG:
+ case -EMSGSIZE:
+ default:
+ abort_code = RXGEN_CC_UNMARSHAL;
+ if (call->state != AFS_CALL_AWAIT_REPLY)
+ abort_code = RXGEN_SS_UNMARSHAL;
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ abort_code, EBADMSG, "KUM");
+ goto do_abort;
+ }
}
+done:
+ if (call->state == AFS_CALL_COMPLETE && call->incoming)
+ afs_end_call(call);
+out:
_leave("");
+ return;
+
+do_abort:
+ call->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ goto done;
}
/*
@@ -572,7 +475,7 @@ static void afs_deliver_to_call(struct afs_call *call)
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
- struct sk_buff *skb;
+ const char *abort_why;
int ret;
DECLARE_WAITQUEUE(myself, current);
@@ -584,15 +487,18 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
set_current_state(TASK_INTERRUPTIBLE);
/* deliver any messages that are in the queue */
- if (!skb_queue_empty(&call->rx_queue)) {
+ if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
continue;
}
+ abort_why = "KWC";
ret = call->error;
- if (call->state >= AFS_CALL_COMPLETE)
+ if (call->state == AFS_CALL_COMPLETE)
break;
+ abort_why = "KWI";
ret = -EINTR;
if (signal_pending(current))
break;
@@ -605,9 +511,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
/* kill the call */
if (call->state < AFS_CALL_COMPLETE) {
_debug("call incomplete");
- rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
- while ((skb = skb_dequeue(&call->rx_queue)))
- afs_free_skb(skb);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_CALL_DEAD, -ret, abort_why);
}
_debug("call complete");
@@ -619,17 +524,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
/*
* wake up a waiting call
*/
-static void afs_wake_up_call_waiter(struct afs_call *call)
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
{
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ call->need_attention = true;
wake_up(&call->waitq);
}
/*
* wake up an asynchronous call
*/
-static void afs_wake_up_async_call(struct afs_call *call)
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long call_user_ID)
{
- _enter("");
+ struct afs_call *call = (struct afs_call *)call_user_ID;
+
+ call->need_attention = true;
queue_work(afs_async_calls, &call->async_work);
}
@@ -647,8 +559,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
/*
* delete an asynchronous call
*/
-static void afs_delete_async_call(struct afs_call *call)
+static void afs_delete_async_call(struct work_struct *work)
{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
_enter("");
afs_free_call(call);
@@ -658,17 +572,19 @@ static void afs_delete_async_call(struct afs_call *call)
/*
* perform processing on an asynchronous call
- * - on a multiple-thread workqueue this work item may try to run on several
- * CPUs at the same time
*/
-static void afs_process_async_call(struct afs_call *call)
+static void afs_process_async_call(struct work_struct *work)
{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
_enter("");
- if (!skb_queue_empty(&call->rx_queue))
+ if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+ call->need_attention = false;
afs_deliver_to_call(call);
+ }
- if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+ if (call->state == AFS_CALL_COMPLETE && call->wait_mode) {
if (call->wait_mode->async_complete)
call->wait_mode->async_complete(call->reply,
call->error);
@@ -679,122 +595,93 @@ static void afs_process_async_call(struct afs_call *call)
/* we can't just delete the call because the work item may be
* queued */
- call->async_workfn = afs_delete_async_call;
+ call->async_work.func = afs_delete_async_call;
queue_work(afs_async_calls, &call->async_work);
}
_leave("");
}
-/*
- * Empty a socket buffer into a flat reply buffer.
- */
-int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last)
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
{
- size_t len = skb->len;
-
- if (len > call->reply_max - call->reply_size) {
- _leave(" = -EBADMSG [%zu > %u]",
- len, call->reply_max - call->reply_size);
- return -EBADMSG;
- }
+ struct afs_call *call = (struct afs_call *)user_call_ID;
- if (len > 0) {
- if (skb_copy_bits(skb, 0, call->buffer + call->reply_size,
- len) < 0)
- BUG();
- call->reply_size += len;
- }
-
- afs_data_consumed(call, skb);
- if (!last)
- return -EAGAIN;
-
- if (call->reply_size != call->reply_max) {
- _leave(" = -EBADMSG [%u != %u]",
- call->reply_size, call->reply_max);
- return -EBADMSG;
- }
- return 0;
+ call->rxcall = rxcall;
}
/*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
*/
-static void afs_collect_incoming_call(struct work_struct *work)
+static void afs_charge_preallocation(struct work_struct *work)
{
- struct rxrpc_call *rxcall;
- struct afs_call *call = NULL;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&afs_incoming_calls))) {
- _debug("new call");
-
- /* don't need the notification */
- afs_free_skb(skb);
+ struct afs_call *call = afs_spare_incoming_call;
+ for (;;) {
if (!call) {
call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
- if (!call) {
- rxrpc_kernel_reject_call(afs_socket);
- return;
- }
+ if (!call)
+ break;
- call->async_workfn = afs_process_async_call;
- INIT_WORK(&call->async_work, afs_async_workfn);
+ INIT_WORK(&call->async_work, afs_process_async_call);
call->wait_mode = &afs_async_incoming_call;
call->type = &afs_RXCMxxxx;
init_waitqueue_head(&call->waitq);
- skb_queue_head_init(&call->rx_queue);
call->state = AFS_CALL_AWAIT_OP_ID;
-
- _debug("CALL %p{%s} [%d]",
- call, call->type->name,
- atomic_read(&afs_outstanding_calls));
- atomic_inc(&afs_outstanding_calls);
}
- rxcall = rxrpc_kernel_accept_call(afs_socket,
- (unsigned long) call);
- if (!IS_ERR(rxcall)) {
- call->rxcall = rxcall;
- call = NULL;
- }
+ if (rxrpc_kernel_charge_accept(afs_socket,
+ afs_wake_up_async_call,
+ afs_rx_attach,
+ (unsigned long)call,
+ GFP_KERNEL) < 0)
+ break;
+ call = NULL;
}
+ afs_spare_incoming_call = call;
+}
+
+/*
+ * Discard a preallocated call when a socket is shut down.
+ */
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ struct afs_call *call = (struct afs_call *)user_call_ID;
- if (call)
- afs_free_call(call);
+ atomic_inc(&afs_outstanding_calls);
+ call->rxcall = NULL;
+ afs_free_call(call);
+}
+
+/*
+ * Notification of an incoming call.
+ */
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+ unsigned long user_call_ID)
+{
+ atomic_inc(&afs_outstanding_calls);
+ queue_work(afs_wq, &afs_charge_preallocation_work);
}
/*
* Grab the operation ID from an incoming cache manager call. The socket
* buffer is discarded on error or if we don't yet have sufficient data.
*/
-static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
- bool last)
+static int afs_deliver_cm_op_id(struct afs_call *call)
{
- size_t len = skb->len;
- void *oibuf = (void *) &call->operation_ID;
+ int ret;
- _enter("{%u},{%zu},%d", call->offset, len, last);
+ _enter("{%zu}", call->offset);
ASSERTCMP(call->offset, <, 4);
/* the operation ID forms the first four bytes of the request data */
- len = min_t(size_t, len, 4 - call->offset);
- if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
- BUG();
- if (!pskb_pull(skb, len))
- BUG();
- call->offset += len;
-
- if (call->offset < 4) {
- afs_data_consumed(call, skb);
- _leave(" = -EAGAIN");
- return -EAGAIN;
- }
+ ret = afs_extract_data(call, &call->operation_ID, 4, true);
+ if (ret < 0)
+ return ret;
call->state = AFS_CALL_AWAIT_REQUEST;
+ call->offset = 0;
/* ask the cache manager to route the call (it'll change the call type
* if successful) */
@@ -803,7 +690,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
/* pass responsibility for the remainer of this message off to the
* cache manager op */
- return call->type->deliver(call, skb, last);
+ return call->type->deliver(call);
}
/*
@@ -823,14 +710,15 @@ void afs_send_empty_reply(struct afs_call *call)
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
- switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+ switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
case 0:
_leave(" [replied]");
return;
case -ENOMEM:
_debug("oom");
- rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_USER_ABORT, ENOMEM, "KOO");
default:
afs_end_call(call);
_leave(" [error]");
@@ -859,7 +747,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
msg.msg_flags = 0;
call->state = AFS_CALL_AWAIT_ACK;
- n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
+ n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
if (n >= 0) {
/* Success */
_leave(" [replied]");
@@ -868,7 +756,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
if (n == -ENOMEM) {
_debug("oom");
- rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+ rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+ RX_USER_ABORT, ENOMEM, "KOO");
}
afs_end_call(call);
_leave(" [error]");
@@ -877,25 +766,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
/*
* Extract a piece of data from the received data socket buffers.
*/
-int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
- bool last, void *buf, size_t count)
+int afs_extract_data(struct afs_call *call, void *buf, size_t count,
+ bool want_more)
{
- size_t len = skb->len;
+ int ret;
- _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+ _enter("{%s,%zu},,%zu,%d",
+ call->type->name, call->offset, count, want_more);
- ASSERTCMP(call->offset, <, count);
+ ASSERTCMP(call->offset, <=, count);
- len = min_t(size_t, len, count - call->offset);
- if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
- !pskb_pull(skb, len))
- BUG();
- call->offset += len;
+ ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+ buf, count, &call->offset,
+ want_more, &call->abort_code);
+ if (ret == 0 || ret == -EAGAIN)
+ return ret;
- if (call->offset < count) {
- afs_data_consumed(call, skb);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ if (ret == 1) {
+ switch (call->state) {
+ case AFS_CALL_AWAIT_REPLY:
+ call->state = AFS_CALL_COMPLETE;
+ break;
+ case AFS_CALL_AWAIT_REQUEST:
+ call->state = AFS_CALL_REPLYING;
+ break;
+ default:
+ break;
+ }
+ return 0;
}
- return 0;
+
+ if (ret == -ECONNABORTED)
+ call->error = call->type->abort_to_error(call->abort_code);
+ else
+ call->error = ret;
+ call->state = AFS_CALL_COMPLETE;
+ return ret;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f342acf3547d..d4066ab7dd55 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -178,13 +178,18 @@ server_in_two_cells:
/*
* look up a server by its IP address
*/
-struct afs_server *afs_find_server(const struct in_addr *_addr)
+struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
{
struct afs_server *server = NULL;
struct rb_node *p;
- struct in_addr addr = *_addr;
+ struct in_addr addr = srx->transport.sin.sin_addr;
- _enter("%pI4", &addr.s_addr);
+ _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+
+ if (srx->transport.family != AF_INET) {
+ WARN(true, "AFS does not yes support non-IPv4 addresses\n");
+ return NULL;
+ }
read_lock(&afs_servers_lock);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index f94d1abdc3eb..94bcd97d22b8 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code)
/*
* deliver reply data to a VL.GetEntryByXXX call
*/
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
- struct sk_buff *skb, bool last)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
{
struct afs_cache_vlocation *entry;
__be32 *bp;
u32 tmp;
int loop, ret;
- _enter(",,%u", last);
+ _enter("");
- ret = afs_transfer_reply(call, skb, last);
+ ret = afs_transfer_reply(call);
if (ret < 0)
return ret;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 52976785a32c..45a86396fd2d 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -594,8 +594,8 @@ static void afs_vlocation_reaper(struct work_struct *work)
*/
int __init afs_vlocation_update_init(void)
{
- afs_vlocation_update_worker =
- create_singlethread_workqueue("kafs_vlupdated");
+ afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
+ WQ_MEM_RECLAIM, 0);
return afs_vlocation_update_worker ? 0 : -ENOMEM;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b493909e7492..d8e6d421c27f 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry,
}
return NULL;
}
+
/*
* Find an eligible tree to time-out
* A tree is eligible if :-
@@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
struct dentry *root = sb->s_root;
struct dentry *dentry;
struct dentry *expired;
+ struct dentry *found;
struct autofs_info *ino;
if (!root)
@@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
+ int flags = how;
+
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
- expired = NULL;
- else
- expired = should_expire(dentry, mnt, timeout, how);
- if (!expired) {
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
spin_unlock(&sbi->fs_lock);
continue;
}
+ spin_unlock(&sbi->fs_lock);
+
+ expired = should_expire(dentry, mnt, timeout, flags);
+ if (!expired)
+ continue;
+
+ spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(expired);
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- spin_lock(&sbi->fs_lock);
- if (should_expire(expired, mnt, timeout, how)) {
- if (expired != dentry)
- dput(dentry);
- goto found;
- }
+ /* Make sure a reference is not taken on found if
+ * things have changed.
+ */
+ flags &= ~AUTOFS_EXP_LEAVES;
+ found = should_expire(expired, mnt, timeout, how);
+ if (!found || found != expired)
+ /* Something has changed, continue */
+ goto next;
+
+ if (expired != dentry)
+ dput(dentry);
+
+ spin_lock(&sbi->fs_lock);
+ goto found;
+next:
+ spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+ spin_unlock(&sbi->fs_lock);
if (expired != dentry)
dput(expired);
- spin_unlock(&sbi->fs_lock);
}
return NULL;
@@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
+ int state;
/* Block on any pending expire */
if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
if (rcu_walk)
return -ECHILD;
+retry:
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
+ state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+ if (state == AUTOFS_INF_WANT_EXPIRE) {
+ spin_unlock(&sbi->fs_lock);
+ /*
+ * Possibly being selected for expire, wait until
+ * it's selected or not.
+ */
+ schedule_timeout_uninterruptible(HZ/10);
+ goto retry;
+ }
+ if (state & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index e5495f37c6ed..2472af2798c7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1624,20 +1624,12 @@ static void do_thread_regset_writeback(struct task_struct *task,
regset->writeback(task, regset, 1);
}
-#ifndef PR_REG_SIZE
-#define PR_REG_SIZE(S) sizeof(S)
-#endif
-
#ifndef PRSTATUS_SIZE
-#define PRSTATUS_SIZE(S) sizeof(S)
-#endif
-
-#ifndef PR_REG_PTR
-#define PR_REG_PTR(S) (&((S)->pr_reg))
+#define PRSTATUS_SIZE(S, R) sizeof(S)
#endif
#ifndef SET_PR_FPVALID
-#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
#endif
static int fill_thread_core_info(struct elf_thread_core_info *t,
@@ -1645,6 +1637,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
long signr, size_t *total)
{
unsigned int i;
+ unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
/*
* NT_PRSTATUS is the one special case, because the regset data
@@ -1653,12 +1646,11 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
* We assume that regset 0 is NT_PRSTATUS.
*/
fill_prstatus(&t->prstatus, t->task, signr);
- (void) view->regsets[0].get(t->task, &view->regsets[0],
- 0, PR_REG_SIZE(t->prstatus.pr_reg),
- PR_REG_PTR(&t->prstatus), NULL);
+ (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+ &t->prstatus.pr_reg, NULL);
fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
- PRSTATUS_SIZE(t->prstatus), &t->prstatus);
+ PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
*total += notesize(&t->notes[0]);
do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1688,7 +1680,8 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
regset->core_note_type,
size, data);
else {
- SET_PR_FPVALID(&t->prstatus, 1);
+ SET_PR_FPVALID(&t->prstatus,
+ 1, regset_size);
fill_note(&t->notes[i], "CORE",
NT_PRFPREG, size, data);
}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 38c2df84cabd..665da8f66ff1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4271,13 +4271,10 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
if (ret < 0)
return ret;
- /*
- * Use new btrfs_qgroup_reserve_data to reserve precious data space
- *
- * TODO: Find a good method to avoid reserve data space for NOCOW
- * range, but don't impact performance on quota disable case.
- */
+ /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
ret = btrfs_qgroup_reserve_data(inode, start, len);
+ if (ret)
+ btrfs_free_reserved_data_space_noquota(inode, start, len);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b2a2da5893af..7fd939bfbd99 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1634,6 +1634,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
int namelen;
int ret = 0;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
ret = mnt_want_write_file(file);
if (ret)
goto out;
@@ -1691,6 +1694,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
struct btrfs_ioctl_vol_args *vol_args;
int ret;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1714,6 +1720,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -2357,6 +2366,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
+ if (!S_ISDIR(dir->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index c30cf49b69d2..2c6312db8516 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -333,6 +333,7 @@ configfs_write_bin_file(struct file *file, const char __user *buf,
if (bin_attr->cb_max_size &&
*ppos + count > bin_attr->cb_max_size) {
len = -EFBIG;
+ goto out;
}
tbuf = vmalloc(*ppos + count);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 592059f88e04..354e2ab62031 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -97,9 +97,6 @@ EXPORT_SYMBOL_GPL(debugfs_use_file_finish);
#define F_DENTRY(filp) ((filp)->f_path.dentry)
-#define REAL_FOPS_DEREF(dentry) \
- ((const struct file_operations *)(dentry)->d_fsdata)
-
static int open_proxy_open(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
@@ -112,7 +109,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not clean up after itself at exit? */
@@ -143,7 +140,7 @@ static ret_type full_proxy_ ## name(proto) \
{ \
const struct dentry *dentry = F_DENTRY(filp); \
const struct file_operations *real_fops = \
- REAL_FOPS_DEREF(dentry); \
+ debugfs_real_fops(filp); \
int srcu_idx; \
ret_type r; \
\
@@ -176,7 +173,7 @@ static unsigned int full_proxy_poll(struct file *filp,
struct poll_table_struct *wait)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
int srcu_idx;
unsigned int r = 0;
@@ -193,7 +190,7 @@ static unsigned int full_proxy_poll(struct file *filp,
static int full_proxy_release(struct inode *inode, struct file *filp)
{
const struct dentry *dentry = F_DENTRY(filp);
- const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+ const struct file_operations *real_fops = debugfs_real_fops(filp);
const struct file_operations *proxy_fops = filp->f_op;
int r = 0;
@@ -209,7 +206,7 @@ static int full_proxy_release(struct inode *inode, struct file *filp)
replace_fops(filp, d_inode(dentry)->i_fop);
kfree((void *)proxy_fops);
fops_put(real_fops);
- return 0;
+ return r;
}
static void __full_proxy_fops_init(struct file_operations *proxy_fops,
@@ -241,7 +238,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
goto out;
}
- real_fops = REAL_FOPS_DEREF(dentry);
+ real_fops = debugfs_real_fops(filp);
real_fops = fops_get(real_fops);
if (!real_fops) {
/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index bba52634b995..b3e8443a1f47 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -19,8 +19,4 @@ extern const struct file_operations debugfs_noop_file_operations;
extern const struct file_operations debugfs_open_proxy_file_operations;
extern const struct file_operations debugfs_full_proxy_file_operations;
-struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
- struct dentry *parent, void *data,
- const struct file_operations *fops);
-
#endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 79a5941c2474..442d1a7e671b 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -272,13 +272,8 @@ static int mknod_ptmx(struct super_block *sb)
struct dentry *root = sb->s_root;
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- kuid_t root_uid;
- kgid_t root_gid;
-
- root_uid = make_kuid(current_user_ns(), 0);
- root_gid = make_kgid(current_user_ns(), 0);
- if (!uid_valid(root_uid) || !gid_valid(root_gid))
- return -EINVAL;
+ kuid_t ptmx_uid = current_fsuid();
+ kgid_t ptmx_gid = current_fsgid();
inode_lock(d_inode(root));
@@ -309,8 +304,8 @@ static int mknod_ptmx(struct super_block *sb)
mode = S_IFCHR|opts->ptmxmode;
init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
- inode->i_uid = root_uid;
- inode->i_gid = root_gid;
+ inode->i_uid = ptmx_uid;
+ inode->i_gid = ptmx_gid;
d_add(dentry, inode);
@@ -336,7 +331,6 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
struct pts_fs_info *fsi = DEVPTS_SB(sb);
struct pts_mount_opts *opts = &fsi->mount_opts;
- sync_filesystem(sb);
err = parse_mount_options(data, PARSE_REMOUNT, opts);
/*
@@ -395,6 +389,7 @@ static int
devpts_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode;
+ int error;
s->s_iflags &= ~SB_I_NODEV;
s->s_blocksize = 1024;
@@ -403,10 +398,16 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_op = &devpts_sops;
s->s_time_gran = 1;
+ error = -ENOMEM;
s->s_fs_info = new_pts_fs_info(s);
if (!s->s_fs_info)
goto fail;
+ error = parse_mount_options(data, PARSE_MOUNT, &DEVPTS_SB(s)->mount_opts);
+ if (error)
+ goto fail;
+
+ error = -ENOMEM;
inode = new_inode(s);
if (!inode)
goto fail;
@@ -418,13 +419,21 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
set_nlink(inode, 2);
s->s_root = d_make_root(inode);
- if (s->s_root)
- return 0;
+ if (!s->s_root) {
+ pr_err("get root dentry failed\n");
+ goto fail;
+ }
- pr_err("get root dentry failed\n");
+ error = mknod_ptmx(s);
+ if (error)
+ goto fail_dput;
+ return 0;
+fail_dput:
+ dput(s->s_root);
+ s->s_root = NULL;
fail:
- return -ENOMEM;
+ return error;
}
/*
@@ -436,43 +445,15 @@ fail:
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- int error;
- struct pts_mount_opts opts;
- struct super_block *s;
-
- error = parse_mount_options(data, PARSE_MOUNT, &opts);
- if (error)
- return ERR_PTR(error);
-
- s = sget(fs_type, NULL, set_anon_super, flags, NULL);
- if (IS_ERR(s))
- return ERR_CAST(s);
-
- if (!s->s_root) {
- error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
- if (error)
- goto out_undo_sget;
- s->s_flags |= MS_ACTIVE;
- }
-
- memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
-
- error = mknod_ptmx(s);
- if (error)
- goto out_undo_sget;
-
- return dget(s->s_root);
-
-out_undo_sget:
- deactivate_locked_super(s);
- return ERR_PTR(error);
+ return mount_nodev(fs_type, flags, data, devpts_fill_super);
}
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
- ida_destroy(&fsi->allocated_ptys);
+ if (fsi)
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 1d73fc6dba13..cbb50cadcffc 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -105,7 +105,10 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
inode->i_private = var;
- efivar_entry_add(var, &efivarfs_list);
+ err = efivar_entry_add(var, &efivarfs_list);
+ if (err)
+ goto out;
+
d_instantiate(dentry, inode);
dget(dentry);
out:
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 688ccc16b702..d7a7c53803c1 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -157,12 +157,14 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
goto fail_inode;
}
+ efivar_entry_size(entry, &size);
+ err = efivar_entry_add(entry, &efivarfs_list);
+ if (err)
+ goto fail_inode;
+
/* copied by the above to local storage in the dentry. */
kfree(name);
- efivar_entry_size(entry, &size);
- efivar_entry_add(entry, &efivarfs_list);
-
inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
@@ -182,7 +184,10 @@ fail:
static int efivarfs_destroy(struct efivar_entry *entry, void *data)
{
- efivar_entry_remove(entry);
+ int err = efivar_entry_remove(entry);
+
+ if (err)
+ return err;
kfree(entry);
return 0;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index efe5fb21c533..04e73a99902b 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -465,6 +465,11 @@ struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext2_get_group_desc(sb, group, &bh2);
+ if (!gdp) {
+ if (++group == sbi->s_groups_count)
+ group = 0;
+ continue;
+ }
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh) {
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index d5c7d09919f3..303ae2bb269a 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -733,6 +733,16 @@ static int ext2_get_blocks(struct inode *inode,
}
if (IS_DAX(inode)) {
+ int i;
+
+ /*
+ * We must unmap blocks before zeroing so that writeback cannot
+ * overwrite zeros with stale data from block device page cache.
+ */
+ for (i = 0; i < count; i++) {
+ unmap_underlying_metadata(inode->i_sb->s_bdev,
+ le32_to_cpu(chain[depth-1].key) + i);
+ }
/*
* block must be initialised before we put it in the tree
* so that it's not found by another thread before it's
diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 1b2f6c2c3aaf..76f09ce7e5b2 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -1,5 +1,6 @@
config FUSE_FS
tristate "FUSE (Filesystem in Userspace) support"
+ select FS_POSIX_ACL
help
With FUSE it is possible to implement a fully functional filesystem
in a userspace program.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index e95eeb445e58..60da84a86dab 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,4 +5,4 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-fuse-objs := dev.o dir.o file.o inode.o control.o
+fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
new file mode 100644
index 000000000000..ec85765502f1
--- /dev/null
+++ b/fs/fuse/acl.c
@@ -0,0 +1,99 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2016 Canonical Ltd. <seth.forshee@canonical.com>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+
+struct posix_acl *fuse_get_acl(struct inode *inode, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int size;
+ const char *name;
+ void *value = NULL;
+ struct posix_acl *acl;
+
+ if (!fc->posix_acl || fc->no_getxattr)
+ return NULL;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return ERR_PTR(-EOPNOTSUPP);
+
+ value = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = fuse_getxattr(inode, name, value, PAGE_SIZE);
+ if (size > 0)
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
+ else if ((size == 0) || (size == -ENODATA) ||
+ (size == -EOPNOTSUPP && fc->no_getxattr))
+ acl = NULL;
+ else if (size == -ERANGE)
+ acl = ERR_PTR(-E2BIG);
+ else
+ acl = ERR_PTR(size);
+
+ kfree(value);
+ return acl;
+}
+
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ const char *name;
+ int ret;
+
+ if (!fc->posix_acl || fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ if (type == ACL_TYPE_ACCESS)
+ name = XATTR_NAME_POSIX_ACL_ACCESS;
+ else if (type == ACL_TYPE_DEFAULT)
+ name = XATTR_NAME_POSIX_ACL_DEFAULT;
+ else
+ return -EINVAL;
+
+ if (acl) {
+ /*
+ * Fuse userspace is responsible for updating access
+ * permissions in the inode, if needed. fuse_setxattr
+ * invalidates the inode attributes, which will force
+ * them to be refreshed the next time they are used,
+ * and it also updates i_ctime.
+ */
+ size_t size = posix_acl_xattr_size(acl->a_count);
+ void *value;
+
+ if (size > PAGE_SIZE)
+ return -E2BIG;
+
+ value = kmalloc(size, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ if (ret < 0) {
+ kfree(value);
+ return ret;
+ }
+
+ ret = fuse_setxattr(inode, name, value, size, 0);
+ kfree(value);
+ } else {
+ ret = fuse_removexattr(inode, name);
+ }
+ forget_all_cached_acls(inode);
+ fuse_invalidate_attr(inode);
+
+ return ret;
+}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index a94d2ed81ab4..c41bde26c338 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -767,7 +767,6 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->len = err;
cs->offset = off;
cs->pg = page;
- cs->offset = off;
iov_iter_advance(cs->iter, err);
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c47b7780ce37..f7c84ab835ca 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -13,6 +13,8 @@
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
@@ -37,47 +39,39 @@ static void fuse_advise_use_readdirplus(struct inode *dir)
set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}
-#if BITS_PER_LONG >= 64
+union fuse_dentry {
+ u64 time;
+ struct rcu_head rcu;
+};
+
static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
{
- entry->d_time = time;
+ ((union fuse_dentry *) entry->d_fsdata)->time = time;
}
static inline u64 fuse_dentry_time(struct dentry *entry)
{
- return entry->d_time;
-}
-#else
-/*
- * On 32 bit archs store the high 32 bits of time in d_fsdata
- */
-static void fuse_dentry_settime(struct dentry *entry, u64 time)
-{
- entry->d_time = time;
- entry->d_fsdata = (void *) (unsigned long) (time >> 32);
-}
-
-static u64 fuse_dentry_time(struct dentry *entry)
-{
- return (u64) entry->d_time +
- ((u64) (unsigned long) entry->d_fsdata << 32);
+ return ((union fuse_dentry *) entry->d_fsdata)->time;
}
-#endif
/*
* FUSE caches dentries and attributes with separate timeout. The
* time in jiffies until the dentry/attributes are valid is stored in
- * dentry->d_time and fuse_inode->i_time respectively.
+ * dentry->d_fsdata and fuse_inode->i_time respectively.
*/
/*
* Calculate the time in jiffies until a dentry/attributes are valid
*/
-static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
+static u64 time_to_jiffies(u64 sec, u32 nsec)
{
if (sec || nsec) {
- struct timespec ts = {sec, nsec};
- return get_jiffies_64() + timespec_to_jiffies(&ts);
+ struct timespec64 ts = {
+ sec,
+ max_t(u32, nsec, NSEC_PER_SEC - 1)
+ };
+
+ return get_jiffies_64() + timespec64_to_jiffies(&ts);
} else
return 0;
}
@@ -243,6 +237,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
+ forget_all_cached_acls(inode);
fuse_change_attributes(inode, &outarg.attr,
entry_attr_timeout(&outarg),
attr_version);
@@ -272,8 +267,23 @@ static int invalid_nodeid(u64 nodeid)
return !nodeid || nodeid == FUSE_ROOT_ID;
}
+static int fuse_dentry_init(struct dentry *dentry)
+{
+ dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
+
+ return dentry->d_fsdata ? 0 : -ENOMEM;
+}
+static void fuse_dentry_release(struct dentry *dentry)
+{
+ union fuse_dentry *fd = dentry->d_fsdata;
+
+ kfree_rcu(fd, rcu);
+}
+
const struct dentry_operations fuse_dentry_operations = {
.d_revalidate = fuse_dentry_revalidate,
+ .d_init = fuse_dentry_init,
+ .d_release = fuse_dentry_release,
};
int fuse_valid_type(int m)
@@ -634,7 +644,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
return create_new_entry(fc, &args, dir, entry, S_IFLNK);
}
-static inline void fuse_update_ctime(struct inode *inode)
+void fuse_update_ctime(struct inode *inode)
{
if (!IS_NOCMTIME(inode)) {
inode->i_ctime = current_fs_time(inode->i_sb);
@@ -917,6 +927,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
if (time_before64(fi->i_time, get_jiffies_64())) {
r = true;
+ forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else {
r = false;
@@ -1017,7 +1028,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
{
const struct cred *cred;
- if (fc->flags & FUSE_ALLOW_OTHER)
+ if (fc->allow_other)
return 1;
cred = current_cred();
@@ -1064,6 +1075,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
+ forget_all_cached_acls(inode);
return fuse_do_getattr(inode, NULL, NULL);
}
@@ -1092,7 +1104,7 @@ static int fuse_permission(struct inode *inode, int mask)
/*
* If attributes are needed, refresh them before proceeding
*/
- if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
+ if (fc->default_permissions ||
((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -1105,7 +1117,7 @@ static int fuse_permission(struct inode *inode, int mask)
}
}
- if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ if (fc->default_permissions) {
err = generic_permission(inode, mask);
/* If permission is denied, try to refresh file
@@ -1233,6 +1245,7 @@ retry:
fi->nlookup++;
spin_unlock(&fc->lock);
+ forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
@@ -1605,7 +1618,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
int err;
bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
- if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ if (!fc->default_permissions)
attr->ia_valid |= ATTR_FORCE;
err = inode_change_ok(inode, attr);
@@ -1702,172 +1715,75 @@ error:
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
struct inode *inode = d_inode(entry);
-
- if (!fuse_allow_current_process(get_fuse_conn(inode)))
- return -EACCES;
-
- if (attr->ia_valid & ATTR_FILE)
- return fuse_do_setattr(inode, attr, attr->ia_file);
- else
- return fuse_do_setattr(inode, attr, NULL);
-}
-
-static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
- struct kstat *stat)
-{
- struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
+ int ret;
- if (!fuse_allow_current_process(fc))
+ if (!fuse_allow_current_process(get_fuse_conn(inode)))
return -EACCES;
- return fuse_update_attributes(inode, stat, NULL, NULL);
-}
-
-static int fuse_setxattr(struct dentry *unused, struct inode *inode,
- const char *name, const void *value,
- size_t size, int flags)
-{
- struct fuse_conn *fc = get_fuse_conn(inode);
- FUSE_ARGS(args);
- struct fuse_setxattr_in inarg;
- int err;
-
- if (fc->no_setxattr)
- return -EOPNOTSUPP;
+ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
+ attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
+ ATTR_MODE);
- memset(&inarg, 0, sizeof(inarg));
- inarg.size = size;
- inarg.flags = flags;
- args.in.h.opcode = FUSE_SETXATTR;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 3;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = strlen(name) + 1;
- args.in.args[1].value = name;
- args.in.args[2].size = size;
- args.in.args[2].value = value;
- err = fuse_simple_request(fc, &args);
- if (err == -ENOSYS) {
- fc->no_setxattr = 1;
- err = -EOPNOTSUPP;
- }
- if (!err) {
- fuse_invalidate_attr(inode);
- fuse_update_ctime(inode);
+ /*
+ * The only sane way to reliably kill suid/sgid is to do it in
+ * the userspace filesystem
+ *
+ * This should be done on write(), truncate() and chown().
+ */
+ if (!fc->handle_killpriv) {
+ int kill;
+
+ /*
+ * ia_mode calculation may have used stale i_mode.
+ * Refresh and recalculate.
+ */
+ ret = fuse_do_getattr(inode, NULL, file);
+ if (ret)
+ return ret;
+
+ attr->ia_mode = inode->i_mode;
+ kill = should_remove_suid(entry);
+ if (kill & ATTR_KILL_SUID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISUID;
+ }
+ if (kill & ATTR_KILL_SGID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISGID;
+ }
+ }
}
- return err;
-}
-
-static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode,
- const char *name, void *value, size_t size)
-{
- struct fuse_conn *fc = get_fuse_conn(inode);
- FUSE_ARGS(args);
- struct fuse_getxattr_in inarg;
- struct fuse_getxattr_out outarg;
- ssize_t ret;
+ if (!attr->ia_valid)
+ return 0;
- if (fc->no_getxattr)
- return -EOPNOTSUPP;
+ ret = fuse_do_setattr(inode, attr, file);
+ if (!ret) {
+ /*
+ * If filesystem supports acls it may have updated acl xattrs in
+ * the filesystem, so forget cached acls for the inode.
+ */
+ if (fc->posix_acl)
+ forget_all_cached_acls(inode);
- memset(&inarg, 0, sizeof(inarg));
- inarg.size = size;
- args.in.h.opcode = FUSE_GETXATTR;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 2;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- args.in.args[1].size = strlen(name) + 1;
- args.in.args[1].value = name;
- /* This is really two different operations rolled into one */
- args.out.numargs = 1;
- if (size) {
- args.out.argvar = 1;
- args.out.args[0].size = size;
- args.out.args[0].value = value;
- } else {
- args.out.args[0].size = sizeof(outarg);
- args.out.args[0].value = &outarg;
- }
- ret = fuse_simple_request(fc, &args);
- if (!ret && !size)
- ret = outarg.size;
- if (ret == -ENOSYS) {
- fc->no_getxattr = 1;
- ret = -EOPNOTSUPP;
+ /* Directory mode changed, may need to revalidate access */
+ if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
+ fuse_invalidate_entry_cache(entry);
}
return ret;
}
-static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+ struct kstat *stat)
{
struct inode *inode = d_inode(entry);
struct fuse_conn *fc = get_fuse_conn(inode);
- FUSE_ARGS(args);
- struct fuse_getxattr_in inarg;
- struct fuse_getxattr_out outarg;
- ssize_t ret;
if (!fuse_allow_current_process(fc))
return -EACCES;
- if (fc->no_listxattr)
- return -EOPNOTSUPP;
-
- memset(&inarg, 0, sizeof(inarg));
- inarg.size = size;
- args.in.h.opcode = FUSE_LISTXATTR;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 1;
- args.in.args[0].size = sizeof(inarg);
- args.in.args[0].value = &inarg;
- /* This is really two different operations rolled into one */
- args.out.numargs = 1;
- if (size) {
- args.out.argvar = 1;
- args.out.args[0].size = size;
- args.out.args[0].value = list;
- } else {
- args.out.args[0].size = sizeof(outarg);
- args.out.args[0].value = &outarg;
- }
- ret = fuse_simple_request(fc, &args);
- if (!ret && !size)
- ret = outarg.size;
- if (ret == -ENOSYS) {
- fc->no_listxattr = 1;
- ret = -EOPNOTSUPP;
- }
- return ret;
-}
-
-static int fuse_removexattr(struct dentry *entry, const char *name)
-{
- struct inode *inode = d_inode(entry);
- struct fuse_conn *fc = get_fuse_conn(inode);
- FUSE_ARGS(args);
- int err;
-
- if (fc->no_removexattr)
- return -EOPNOTSUPP;
-
- args.in.h.opcode = FUSE_REMOVEXATTR;
- args.in.h.nodeid = get_node_id(inode);
- args.in.numargs = 1;
- args.in.args[0].size = strlen(name) + 1;
- args.in.args[0].value = name;
- err = fuse_simple_request(fc, &args);
- if (err == -ENOSYS) {
- fc->no_removexattr = 1;
- err = -EOPNOTSUPP;
- }
- if (!err) {
- fuse_invalidate_attr(inode);
- fuse_update_ctime(inode);
- }
- return err;
+ return fuse_update_attributes(inode, stat, NULL, NULL);
}
static const struct inode_operations fuse_dir_inode_operations = {
@@ -1884,10 +1800,12 @@ static const struct inode_operations fuse_dir_inode_operations = {
.mknod = fuse_mknod,
.permission = fuse_permission,
.getattr = fuse_getattr,
- .setxattr = fuse_setxattr,
- .getxattr = fuse_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = fuse_listxattr,
- .removexattr = fuse_removexattr,
+ .removexattr = generic_removexattr,
+ .get_acl = fuse_get_acl,
+ .set_acl = fuse_set_acl,
};
static const struct file_operations fuse_dir_operations = {
@@ -1905,10 +1823,12 @@ static const struct inode_operations fuse_common_inode_operations = {
.setattr = fuse_setattr,
.permission = fuse_permission,
.getattr = fuse_getattr,
- .setxattr = fuse_setxattr,
- .getxattr = fuse_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = fuse_listxattr,
- .removexattr = fuse_removexattr,
+ .removexattr = generic_removexattr,
+ .get_acl = fuse_get_acl,
+ .set_acl = fuse_set_acl,
};
static const struct inode_operations fuse_symlink_inode_operations = {
@@ -1916,10 +1836,10 @@ static const struct inode_operations fuse_symlink_inode_operations = {
.get_link = fuse_get_link,
.readlink = generic_readlink,
.getattr = fuse_getattr,
- .setxattr = fuse_setxattr,
- .getxattr = fuse_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = fuse_listxattr,
- .removexattr = fuse_removexattr,
+ .removexattr = generic_removexattr,
};
void fuse_init_common(struct inode *inode)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 3988b43c2f5a..b7beb67bf005 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2326,49 +2326,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
return retval;
}
-static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
- unsigned int nr_segs, size_t bytes, bool to_user)
-{
- struct iov_iter ii;
- int page_idx = 0;
-
- if (!bytes)
- return 0;
-
- iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
-
- while (iov_iter_count(&ii)) {
- struct page *page = pages[page_idx++];
- size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
- void *kaddr;
-
- kaddr = kmap(page);
-
- while (todo) {
- char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
- size_t iov_len = ii.iov->iov_len - ii.iov_offset;
- size_t copy = min(todo, iov_len);
- size_t left;
-
- if (!to_user)
- left = copy_from_user(kaddr, uaddr, copy);
- else
- left = copy_to_user(uaddr, kaddr, copy);
-
- if (unlikely(left))
- return -EFAULT;
-
- iov_iter_advance(&ii, copy);
- todo -= copy;
- kaddr += copy;
- }
-
- kunmap(page);
- }
-
- return 0;
-}
-
/*
* CUSE servers compiled on 32bit broke on 64bit kernels because the
* ABI was defined to be 'struct iovec' which is different on 32bit
@@ -2520,8 +2477,9 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov_page = NULL;
struct iovec *in_iov = NULL, *out_iov = NULL;
unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
- size_t in_size, out_size, transferred;
- int err;
+ size_t in_size, out_size, transferred, c;
+ int err, i;
+ struct iov_iter ii;
#if BITS_PER_LONG == 32
inarg.flags |= FUSE_IOCTL_32BIT;
@@ -2603,10 +2561,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
req->in.args[1].size = in_size;
req->in.argpages = 1;
- err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
- false);
- if (err)
- goto out;
+ err = -EFAULT;
+ iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
+ c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
}
req->out.numargs = 2;
@@ -2672,7 +2633,14 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
if (transferred > inarg.out_size)
goto out;
- err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
+ err = -EFAULT;
+ iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
+ c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
+ err = 0;
out:
if (req)
fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d98d8cc84def..24ada5dc4dae 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -23,6 +23,7 @@
#include <linux/poll.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/xattr.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -36,15 +37,6 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
-/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
- module will check permissions based on the file mode. Otherwise no
- permission checking is done in the kernel */
-#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
-
-/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
- doing the mount will be allowed to access the filesystem */
-#define FUSE_ALLOW_OTHER (1 << 1)
-
/** Number of page pointers embedded in fuse_req */
#define FUSE_REQ_INLINE_PAGES 1
@@ -469,9 +461,6 @@ struct fuse_conn {
/** The group id for this mount */
kgid_t group_id;
- /** The fuse mount flags for this mount */
- unsigned flags;
-
/** Maximum read size */
unsigned max_read;
@@ -547,6 +536,9 @@ struct fuse_conn {
/** allow parallel lookups and readdir (default is serialized) */
unsigned parallel_dirops:1;
+ /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
+ unsigned handle_killpriv:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -624,6 +616,15 @@ struct fuse_conn {
/** Is lseek not implemented by fs? */
unsigned no_lseek:1;
+ /** Does the filesystem support posix acls? */
+ unsigned posix_acl:1;
+
+ /** Check permissions based on the file mode or not? */
+ unsigned default_permissions:1;
+
+ /** Allow other than the mounter user to access the filesystem ? */
+ unsigned allow_other:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
@@ -902,6 +903,8 @@ int fuse_allow_current_process(struct fuse_conn *fc);
u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
+void fuse_update_ctime(struct inode *inode);
+
int fuse_update_attributes(struct inode *inode, struct kstat *stat,
struct file *file, bool *refreshed);
@@ -966,4 +969,17 @@ void fuse_set_initialized(struct fuse_conn *fc);
void fuse_unlock_inode(struct inode *inode);
void fuse_lock_inode(struct inode *inode);
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags);
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size);
+ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
+int fuse_removexattr(struct inode *inode, const char *name);
+extern const struct xattr_handler *fuse_xattr_handlers[];
+extern const struct xattr_handler *fuse_acl_xattr_handlers[];
+
+struct posix_acl;
+struct posix_acl *fuse_get_acl(struct inode *inode, int type);
+int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4e05b51120f4..17141099f2e7 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <linux/sched.h>
#include <linux/exportfs.h>
+#include <linux/posix_acl.h>
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -66,7 +67,8 @@ struct fuse_mount_data {
unsigned rootmode_present:1;
unsigned user_id_present:1;
unsigned group_id_present:1;
- unsigned flags;
+ unsigned default_permissions:1;
+ unsigned allow_other:1;
unsigned max_read;
unsigned blksize;
};
@@ -192,7 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
* check in may_delete().
*/
fi->orig_i_mode = inode->i_mode;
- if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+ if (!fc->default_permissions)
inode->i_mode &= ~S_ISVTX;
fi->orig_ino = attr->ino;
@@ -340,6 +342,7 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
return -ENOENT;
fuse_invalidate_attr(inode);
+ forget_all_cached_acls(inode);
if (offset >= 0) {
pg_start = offset >> PAGE_SHIFT;
if (len <= 0)
@@ -532,11 +535,11 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_DEFAULT_PERMISSIONS:
- d->flags |= FUSE_DEFAULT_PERMISSIONS;
+ d->default_permissions = 1;
break;
case OPT_ALLOW_OTHER:
- d->flags |= FUSE_ALLOW_OTHER;
+ d->allow_other = 1;
break;
case OPT_MAX_READ:
@@ -570,9 +573,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id));
seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id));
- if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+ if (fc->default_permissions)
seq_puts(m, ",default_permissions");
- if (fc->flags & FUSE_ALLOW_OTHER)
+ if (fc->allow_other)
seq_puts(m, ",allow_other");
if (fc->max_read != ~0)
seq_printf(m, ",max_read=%u", fc->max_read);
@@ -910,8 +913,15 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->writeback_cache = 1;
if (arg->flags & FUSE_PARALLEL_DIROPS)
fc->parallel_dirops = 1;
+ if (arg->flags & FUSE_HANDLE_KILLPRIV)
+ fc->handle_killpriv = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
+ if ((arg->flags & FUSE_POSIX_ACL)) {
+ fc->default_permissions = 1;
+ fc->posix_acl = 1;
+ fc->sb->s_xattr = fuse_acl_xattr_handlers;
+ }
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -941,7 +951,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
- FUSE_PARALLEL_DIROPS;
+ FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -1071,6 +1081,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
+ sb->s_xattr = fuse_xattr_handlers;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
sb->s_export_op = &fuse_export_operations;
@@ -1109,7 +1120,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
fc->dont_mask = 1;
sb->s_flags |= MS_POSIXACL;
- fc->flags = d.flags;
+ fc->default_permissions = d.default_permissions;
+ fc->allow_other = d.allow_other;
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = max_t(unsigned, 4096, d.max_read);
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
new file mode 100644
index 000000000000..3caac46b08b0
--- /dev/null
+++ b/fs/fuse/xattr.c
@@ -0,0 +1,211 @@
+/*
+ * FUSE: Filesystem in Userspace
+ * Copyright (C) 2001-2016 Miklos Szeredi <miklos@szeredi.hu>
+ *
+ * This program can be distributed under the terms of the GNU GPL.
+ * See the file COPYING.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+int fuse_setxattr(struct inode *inode, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_setxattr_in inarg;
+ int err;
+
+ if (fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ inarg.flags = flags;
+ args.in.h.opcode = FUSE_SETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 3;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ args.in.args[2].size = size;
+ args.in.args[2].value = value;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_setxattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (!err) {
+ fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
+ return err;
+}
+
+ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
+ size_t size)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_getxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ args.in.h.opcode = FUSE_GETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ /* This is really two different operations rolled into one */
+ args.out.numargs = 1;
+ if (size) {
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = value;
+ } else {
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX);
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ return ret;
+}
+
+static int fuse_verify_xattr_list(char *list, size_t size)
+{
+ size_t origsize = size;
+
+ while (size) {
+ size_t thislen = strnlen(list, size);
+
+ if (!thislen || thislen == size)
+ return -EIO;
+
+ size -= thislen + 1;
+ list += thislen + 1;
+ }
+
+ return origsize;
+}
+
+ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+ struct inode *inode = d_inode(entry);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (!fuse_allow_current_process(fc))
+ return -EACCES;
+
+ if (fc->no_listxattr)
+ return -EOPNOTSUPP;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ args.in.h.opcode = FUSE_LISTXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ /* This is really two different operations rolled into one */
+ args.out.numargs = 1;
+ if (size) {
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = list;
+ } else {
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX);
+ if (ret > 0 && size)
+ ret = fuse_verify_xattr_list(list, ret);
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ return ret;
+}
+
+int fuse_removexattr(struct inode *inode, const char *name)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ FUSE_ARGS(args);
+ int err;
+
+ if (fc->no_removexattr)
+ return -EOPNOTSUPP;
+
+ args.in.h.opcode = FUSE_REMOVEXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = strlen(name) + 1;
+ args.in.args[0].value = name;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_removexattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (!err) {
+ fuse_invalidate_attr(inode);
+ fuse_update_ctime(inode);
+ }
+ return err;
+}
+
+static int fuse_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ return fuse_getxattr(inode, name, value, size);
+}
+
+static int fuse_xattr_set(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ if (!value)
+ return fuse_removexattr(inode, name);
+
+ return fuse_setxattr(inode, name, value, size, flags);
+}
+
+static const struct xattr_handler fuse_xattr_handler = {
+ .prefix = "",
+ .get = fuse_xattr_get,
+ .set = fuse_xattr_set,
+};
+
+const struct xattr_handler *fuse_xattr_handlers[] = {
+ &fuse_xattr_handler,
+ NULL
+};
+
+const struct xattr_handler *fuse_acl_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ &fuse_xattr_handler,
+ NULL
+};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 82df36886938..5a6f52ea2722 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -187,7 +187,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
ClearPageChecked(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
- (1 << BH_Dirty)|(1 << BH_Uptodate));
+ BIT(BH_Dirty)|BIT(BH_Uptodate));
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
}
@@ -1147,6 +1147,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
if (!page_has_buffers(page))
return 0;
+ /*
+ * From xfs_vm_releasepage: mm accommodates an old ext3 case where
+ * clean pages might not have had the dirty bit cleared. Thus, it can
+ * send actual dirty pages to ->releasepage() via shrink_active_list().
+ *
+ * As a workaround, we skip pages that contain dirty buffers below.
+ * Once ->releasepage isn't called on dirty pages anymore, we can warn
+ * on dirty buffers like we used to here again.
+ */
+
gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
head = bh = page_buffers(page);
@@ -1156,8 +1166,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bd = bh->b_private;
if (bd && bd->bd_tr)
goto cannot_release;
- if (buffer_pinned(bh) || buffer_dirty(bh))
- goto not_possible;
+ if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh)))
+ goto cannot_release;
bh = bh->b_this_page;
} while(bh != head);
spin_unlock(&sdp->sd_ail_lock);
@@ -1180,9 +1190,6 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
return try_to_free_buffers(page);
-not_possible: /* Should never happen */
- WARN_ON(buffer_dirty(bh));
- WARN_ON(buffer_pinned(bh));
cannot_release:
spin_unlock(&sdp->sd_ail_lock);
gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1cd289..645721f3ff00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -82,8 +82,8 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits,
- (1 << BH_Uptodate));
+ create_empty_buffers(page, BIT(inode->i_blkbits),
+ BIT(BH_Uptodate));
bh = page_buffers(page);
@@ -690,7 +690,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
BUG_ON(!dblock);
BUG_ON(!new);
- bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5));
+ bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
ret = gfs2_block_map(inode, lblock, &bh, create);
*extlen = bh.b_size >> inode->i_blkbits;
*dblock = bh.b_blocknr;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index fcb59b23f1e3..db8fbeb62483 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -351,7 +351,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
if (hc)
return hc;
- hsize = 1 << ip->i_depth;
+ hsize = BIT(ip->i_depth);
hsize *= sizeof(__be64);
if (hsize != i_size_read(&ip->i_inode)) {
gfs2_consist_inode(ip);
@@ -819,8 +819,8 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode,
if (ip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf;
- unsigned hsize = 1 << ip->i_depth;
- unsigned index;
+ unsigned int hsize = BIT(ip->i_depth);
+ unsigned int index;
u64 ln;
if (hsize * sizeof(u64) != i_size_read(inode)) {
gfs2_consist_inode(ip);
@@ -932,7 +932,7 @@ static int dir_make_exhash(struct inode *inode)
return -ENOSPC;
bn = bh->b_blocknr;
- gfs2_assert(sdp, dip->i_entries < (1 << 16));
+ gfs2_assert(sdp, dip->i_entries < BIT(16));
leaf->lf_entries = cpu_to_be16(dip->i_entries);
/* Copy dirents */
@@ -1041,7 +1041,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
bn = nbh->b_blocknr;
/* Compute the start and len of leaf pointers in the hash table. */
- len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
+ len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
half_len = len >> 1;
if (!half_len) {
pr_warn("i_depth %u lf_depth %u index %u\n",
@@ -1163,7 +1163,7 @@ static int dir_double_exhash(struct gfs2_inode *dip)
int x;
int error = 0;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
hsize_bytes = hsize * sizeof(__be64);
hc = gfs2_dir_get_hash_table(dip);
@@ -1539,7 +1539,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
int error = 0;
unsigned depth = 0;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
hash = gfs2_dir_offset2hash(ctx->pos);
index = hash >> (32 - dip->i_depth);
@@ -1558,7 +1558,7 @@ static int dir_e_read(struct inode *inode, struct dir_context *ctx,
if (error)
break;
- len = 1 << (dip->i_depth - depth);
+ len = BIT(dip->i_depth - depth);
index = (index & ~(len - 1)) + len;
}
@@ -2113,7 +2113,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
u64 leaf_no;
int error = 0, last;
- hsize = 1 << dip->i_depth;
+ hsize = BIT(dip->i_depth);
lp = gfs2_dir_get_hash_table(dip);
if (IS_ERR(lp))
@@ -2126,7 +2126,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
if (error)
goto out;
leaf = (struct gfs2_leaf *)bh->b_data;
- len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+ len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth));
next_index = (index & ~(len - 1)) + len;
last = ((next_index >= hsize) ? 1 : 0);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 320e65e61938..360188f162bd 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -395,9 +395,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
- /* Update file times before taking page lock */
- file_update_time(vma->vm_file);
-
ret = gfs2_rsqa_alloc(ip);
if (ret)
goto out;
@@ -409,6 +406,9 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_uninit;
+ /* Update file times before taking page lock */
+ file_update_time(vma->vm_file);
+
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3a90b2b5b9bb..14cbf60167a7 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -69,7 +69,7 @@ static atomic_t lru_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
-#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
+#define GFS2_GL_HASH_SIZE BIT(GFS2_GL_HASH_SHIFT)
static struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
@@ -1781,7 +1781,13 @@ int __init gfs2_glock_init(void)
return -ENOMEM;
}
- register_shrinker(&glock_shrinker);
+ ret = register_shrinker(&glock_shrinker);
+ if (ret) {
+ destroy_workqueue(gfs2_delete_workqueue);
+ destroy_workqueue(glock_workqueue);
+ rhashtable_destroy(&gl_hash_table);
+ return ret;
+ }
return 0;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e4da0ecd3285..fb3a810b506f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -187,6 +187,10 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
}
gfs2_set_iop(inode);
+
+ inode->i_atime.tv_sec = 0;
+ inode->i_atime.tv_nsec = 0;
+
unlock_new_inode(inode);
}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 7710dfd3af35..aace8ce34a18 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -85,7 +85,7 @@ static inline int gfs2_check_internal_file_size(struct inode *inode,
u64 size = i_size_read(inode);
if (size < minsize || size > maxsize)
goto err;
- if (size & ((1 << inode->i_blkbits) - 1))
+ if (size & (BIT(inode->i_blkbits) - 1))
goto err;
return 0;
err:
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 74fd0139e6c2..67d1fc4668f7 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -145,7 +145,9 @@ static int __init init_gfs2_fs(void)
if (!gfs2_qadata_cachep)
goto fail;
- register_shrinker(&gfs2_qd_shrinker);
+ error = register_shrinker(&gfs2_qd_shrinker);
+ if (error)
+ goto fail;
error = register_filesystem(&gfs2_fs_type);
if (error)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 950b8be68e41..373639a59782 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -216,23 +216,26 @@ static void gfs2_meta_read_endio(struct bio *bio)
static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
int num)
{
- struct buffer_head *bh = bhs[0];
- struct bio *bio;
- int i;
-
- if (!num)
- return;
-
- bio = bio_alloc(GFP_NOIO, num);
- bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
- for (i = 0; i < num; i++) {
- bh = bhs[i];
- bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+ while (num > 0) {
+ struct buffer_head *bh = *bhs;
+ struct bio *bio;
+
+ bio = bio_alloc(GFP_NOIO, num);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_bdev = bh->b_bdev;
+ while (num > 0) {
+ bh = *bhs;
+ if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
+ BUG_ON(bio->bi_iter.bi_size == 0);
+ break;
+ }
+ bhs++;
+ num--;
+ }
+ bio->bi_end_io = gfs2_meta_read_endio;
+ bio_set_op_attrs(bio, op, op_flags);
+ submit_bio(bio);
}
- bio->bi_end_io = gfs2_meta_read_endio;
- bio_set_op_attrs(bio, op, op_flags);
- submit_bio(bio);
}
/**
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ef1e1822977f..ff72ac6439c8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -58,7 +58,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
gt->gt_new_files_jdata = 0;
- gt->gt_max_readahead = 1 << 18;
+ gt->gt_max_readahead = BIT(18);
gt->gt_complain_secs = 10;
}
@@ -284,7 +284,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode)) / sizeof(u64);
sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
@@ -302,7 +302,7 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
/* Compute maximum reservation required to add a entry to a directory */
- hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+ hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
sdp->sd_jbsize);
ind_blocks = 0;
@@ -1089,7 +1089,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 77930ca25303..8af2dfa09236 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,7 +75,7 @@
#include "util.h"
#define GFS2_QD_HASH_SHIFT 12
-#define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT)
+#define GFS2_QD_HASH_SIZE BIT(GFS2_QD_HASH_SHIFT)
#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1)
/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
@@ -384,7 +384,7 @@ static int bh_get(struct gfs2_quota_data *qd)
block = qd->qd_slot / sdp->sd_qc_per_block;
offset = qd->qd_slot % sdp->sd_qc_per_block;
- bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+ bh_map.b_size = BIT(ip->i_inode.i_blkbits);
error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
if (error)
goto fail;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3a7e60bb39f8..e3ee387a6dfe 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -359,7 +359,7 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd)
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
u64 size = i_size_read(jd->jd_inode);
- if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30))
+ if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
return -EIO;
jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 2e58978d6f45..4d973524c887 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2893,8 +2893,7 @@ restart:
* on anon_list2. Let's check.
*/
if (!list_empty(&TxAnchor.anon_list2)) {
- list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
- INIT_LIST_HEAD(&TxAnchor.anon_list2);
+ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
goto restart;
}
TXN_UNLOCK();
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 90b3bc21e9b0..bd9b641ada2c 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -379,8 +379,14 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
* cached in meta-data cache, and not written out
* by txCommit();
*/
- filemap_fdatawait(ipbmap->i_mapping);
- filemap_write_and_wait(ipbmap->i_mapping);
+ rc = filemap_fdatawait(ipbmap->i_mapping);
+ if (rc)
+ goto error_out;
+
+ rc = filemap_write_and_wait(ipbmap->i_mapping);
+ if (rc)
+ goto error_out;
+
diWriteSpecial(ipbmap, 0);
newPage = nPages; /* first new page number */
diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f6fc13..90ec67108b22 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
#include <linux/pid_namespace.h>
#include <linux/hashtable.h>
#include <linux/percpu.h>
-#include <linux/lglock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/filelock.h>
@@ -158,12 +157,18 @@ int lease_break_time = 45;
/*
* The global file_lock_list is only used for displaying /proc/locks, so we
- * keep a list on each CPU, with each list protected by its own spinlock via
- * the file_lock_lglock. Note that alterations to the list also require that
- * the relevant flc_lock is held.
+ * keep a list on each CPU, with each list protected by its own spinlock.
+ * Global serialization is done using file_rwsem.
+ *
+ * Note that alterations to the list also require that the relevant flc_lock is
+ * held.
*/
-DEFINE_STATIC_LGLOCK(file_lock_lglock);
-static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+struct file_lock_list_struct {
+ spinlock_t lock;
+ struct hlist_head hlist;
+};
+static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
+DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
/*
* The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,15 +592,23 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
/* Must be called with the flc_lock held! */
static void locks_insert_global_locks(struct file_lock *fl)
{
- lg_local_lock(&file_lock_lglock);
+ struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
+ spin_lock(&fll->lock);
fl->fl_link_cpu = smp_processor_id();
- hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
- lg_local_unlock(&file_lock_lglock);
+ hlist_add_head(&fl->fl_link, &fll->hlist);
+ spin_unlock(&fll->lock);
}
/* Must be called with the flc_lock held! */
static void locks_delete_global_locks(struct file_lock *fl)
{
+ struct file_lock_list_struct *fll;
+
+ percpu_rwsem_assert_held(&file_rwsem);
+
/*
* Avoid taking lock if already unhashed. This is safe since this check
* is done while holding the flc_lock, and new insertions into the list
@@ -603,9 +616,11 @@ static void locks_delete_global_locks(struct file_lock *fl)
*/
if (hlist_unhashed(&fl->fl_link))
return;
- lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+
+ fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
+ spin_lock(&fll->lock);
hlist_del_init(&fl->fl_link);
- lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+ spin_unlock(&fll->lock);
}
static unsigned long
@@ -915,6 +930,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
return -ENOMEM;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
if (request->fl_flags & FL_ACCESS)
goto find_conflict;
@@ -955,6 +971,7 @@ find_conflict:
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
if (new_fl)
locks_free_lock(new_fl);
locks_dispose_list(&dispose);
@@ -991,6 +1008,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
new_fl2 = locks_alloc_lock();
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
/*
* New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1180,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
/*
* Free any unused locks.
*/
@@ -1436,6 +1455,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
@@ -1487,9 +1507,13 @@ restart:
locks_insert_block(fl, new_fl);
trace_break_lease_block(inode, new_fl);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
+
locks_dispose_list(&dispose);
error = wait_event_interruptible_timeout(new_fl->fl_wait,
!new_fl->fl_next, break_time);
+
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
trace_break_lease_unblock(inode, new_fl);
locks_delete_block(new_fl);
@@ -1506,6 +1530,7 @@ restart:
}
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
locks_free_lock(new_fl);
return error;
@@ -1660,6 +1685,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
return -EINVAL;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1756,7 @@ out_setup:
lease->fl_lmops->lm_setup(lease, priv);
out:
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
if (is_deleg)
inode_unlock(inode);
@@ -1752,6 +1779,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
return error;
}
+ percpu_down_read_preempt_disable(&file_rwsem);
spin_lock(&ctx->flc_lock);
list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
if (fl->fl_file == filp &&
@@ -1764,6 +1792,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
if (victim)
error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
spin_unlock(&ctx->flc_lock);
+ percpu_up_read_preempt_enable(&file_rwsem);
locks_dispose_list(&dispose);
return error;
}
@@ -2574,9 +2603,20 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
struct inode *inode = NULL;
unsigned int fl_pid;
- if (fl->fl_nspid)
- fl_pid = pid_vnr(fl->fl_nspid);
- else
+ if (fl->fl_nspid) {
+ struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+
+ /* Don't let fl_pid change based on who is reading the file */
+ fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
+
+ /*
+ * If there isn't a fl_pid don't display who is waiting on
+ * the lock if we are called from locks_show, or if we are
+ * called from __show_fd_info - skip lock entirely
+ */
+ if (fl_pid == 0)
+ return;
+ } else
fl_pid = fl->fl_pid;
if (fl->fl_file != NULL)
@@ -2648,9 +2688,13 @@ static int locks_show(struct seq_file *f, void *v)
{
struct locks_iterator *iter = f->private;
struct file_lock *fl, *bfl;
+ struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
fl = hlist_entry(v, struct file_lock, fl_link);
+ if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
+ return 0;
+
lock_get_status(f, fl, iter->li_pos, "");
list_for_each_entry(bfl, &fl->fl_block, fl_block)
@@ -2703,9 +2747,9 @@ static void *locks_start(struct seq_file *f, loff_t *pos)
struct locks_iterator *iter = f->private;
iter->li_pos = *pos + 1;
- lg_global_lock(&file_lock_lglock);
+ percpu_down_write(&file_rwsem);
spin_lock(&blocked_lock_lock);
- return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
+ return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
}
static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
@@ -2713,14 +2757,14 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
struct locks_iterator *iter = f->private;
++iter->li_pos;
- return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
+ return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
}
static void locks_stop(struct seq_file *f, void *v)
__releases(&blocked_lock_lock)
{
spin_unlock(&blocked_lock_lock);
- lg_global_unlock(&file_lock_lglock);
+ percpu_up_write(&file_rwsem);
}
static const struct seq_operations locks_seq_operations = {
@@ -2761,10 +2805,13 @@ static int __init filelock_init(void)
filelock_cache = kmem_cache_create("file_lock_cache",
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
- lg_lock_init(&file_lock_lglock, "file_lock_lglock");
- for_each_possible_cpu(i)
- INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
+ for_each_possible_cpu(i) {
+ struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
+
+ spin_lock_init(&fll->lock);
+ INIT_HLIST_HEAD(&fll->hlist);
+ }
return 0;
}
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ecca6a5..e0e5f7c3c99f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response ||
- atomic_read(&group->fanotify_data.bypass_perm));
-
- if (!event->response) { /* bypass_perm set */
- /*
- * Event was canceled because group is being destroyed. Remove
- * it from group's event list because we are responsible for
- * freeing the permission event.
- */
- fsnotify_remove_event(group, &event->fae.fse);
- return 0;
- }
+ wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bcd1d43..a64313868d3a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next;
+ struct fsnotify_event *fsn_event;
/*
- * There may be still new events arriving in the notification queue
- * but since userspace cannot use fanotify fd anymore, no event can
- * enter or leave access_list by now.
+ * Stop new events from arriving in the notification queue. since
+ * userspace cannot use fanotify fd anymore, no event can enter or
+ * leave access_list by now either.
*/
- spin_lock(&group->fanotify_data.access_lock);
-
- atomic_inc(&group->fanotify_data.bypass_perm);
+ fsnotify_group_stop_queueing(group);
+ /*
+ * Process all permission events on access_list and notification queue
+ * and simulate reply from userspace.
+ */
+ spin_lock(&group->fanotify_data.access_lock);
list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
fae.fse.list) {
pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
spin_unlock(&group->fanotify_data.access_lock);
/*
- * Since bypass_perm is set, newly queued events will not wait for
- * access response. Wake up the already sleeping ones now.
- * synchronize_srcu() in fsnotify_destroy_group() will wait for all
- * processes sleeping in fanotify_handle_event() waiting for access
- * response and thus also for all permission events to be freed.
+ * Destroy all non-permission events. For permission events just
+ * dequeue them and set the response. They will be freed once the
+ * response is consumed and fanotify_get_response() returns.
*/
+ mutex_lock(&group->notification_mutex);
+ while (!fsnotify_notify_queue_is_empty(group)) {
+ fsn_event = fsnotify_remove_first_event(group);
+ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+ fsnotify_destroy_event(group, fsn_event);
+ else
+ FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+ }
+ mutex_unlock(&group->notification_mutex);
+
+ /* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq);
#endif
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
spin_lock_init(&group->fanotify_data.access_lock);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF:
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 3e2dd85be5dd..b47f7cfdcaa4 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
}
/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+ mutex_lock(&group->notification_mutex);
+ group->shutdown = true;
+ mutex_unlock(&group->notification_mutex);
+}
+
+/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
+ /*
+ * Stop queueing new events. The code below is careful enough to not
+ * require this but fanotify needs to stop queuing events even before
+ * fsnotify_destroy_group() is called and this makes the other callers
+ * of fsnotify_destroy_group() to see the same behavior.
+ */
+ fsnotify_group_stop_queueing(group);
+
/* clear all inode marks for this group, attach them to destroy_list */
fsnotify_detach_group_marks(group);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e037aeb..e455e83ceeeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
*/
int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
mutex_lock(&group->notification_mutex);
+ if (group->shutdown) {
+ mutex_unlock(&group->notification_mutex);
+ return 2;
+ }
+
if (group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@ queue:
}
/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
- struct fsnotify_event *event)
-{
- mutex_lock(&group->notification_mutex);
- if (!list_empty(&event->list)) {
- list_del_init(&event->list);
- group->q_len--;
- }
- mutex_unlock(&group->notification_mutex);
-}
-
-/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7dabbc31060e..f165f867f332 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5922,7 +5922,6 @@ bail:
}
static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
- handle_t *handle,
struct inode *data_alloc_inode,
struct buffer_head *data_alloc_bh)
{
@@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
struct ocfs2_truncate_log *tl;
struct inode *tl_inode = osb->osb_tl_inode;
struct buffer_head *tl_bh = osb->osb_tl_bh;
+ handle_t *handle;
di = (struct ocfs2_dinode *) tl_bh->b_data;
tl = &di->id2.i_dealloc;
i = le16_to_cpu(tl->tl_used) - 1;
while (i >= 0) {
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail;
+ }
+
/* Caller has given us at least enough credits to
* update the truncate log dinode */
status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
}
}
- status = ocfs2_extend_trans(handle,
- OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
+ ocfs2_commit_trans(osb, handle);
i--;
}
@@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
{
int status;
unsigned int num_to_flush;
- handle_t *handle;
struct inode *tl_inode = osb->osb_tl_inode;
struct inode *data_alloc_inode = NULL;
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto out_unlock;
- }
-
- status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+ status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
data_alloc_bh);
if (status < 0)
mlog_errno(status);
- ocfs2_commit_trans(osb, handle);
-
-out_unlock:
brelse(data_alloc_bh);
ocfs2_inode_unlock(data_alloc_inode, 1);
@@ -6413,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out_unlock;
- }
-
while (head) {
if (head->free_bg)
bg_blkno = head->free_bg;
else
bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
head->free_bit);
+ handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out_unlock;
+ }
+
trace_ocfs2_free_cached_blocks(
(unsigned long long)head->free_blk, head->free_bit);
ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
head->free_bit, bg_blkno, 1);
- if (ret) {
+ if (ret)
mlog_errno(ret);
- goto out_journal;
- }
- ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
- if (ret) {
- mlog_errno(ret);
- goto out_journal;
- }
+ ocfs2_commit_trans(osb, handle);
tmp = head;
head = head->free_next;
kfree(tmp);
}
-out_journal:
- ocfs2_commit_trans(osb, handle);
-
out_unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 98d36548153d..bbb4b3e5b4ff 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1842,6 +1842,16 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out:
+ /*
+ * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(),
+ * even in case of error here like ENOSPC and ENOMEM. So, we need
+ * to unlock the target page manually to prevent deadlocks when
+ * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED
+ * to VM code.
+ */
+ if (wc->w_target_locked)
+ unlock_page(mmap_page);
+
ocfs2_free_write_ctxt(inode, wc);
if (data_ac) {
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 94b18369b1cc..b95e7df5b76a 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,9 +44,6 @@
* version here in tcp_internal.h should not need to be bumped for
* filesystem locking changes.
*
- * New in version 12
- * - Negotiate hb timeout when storage is down.
- *
* New in version 11
* - Negotiation of filesystem locking in the dlm join.
*
@@ -78,7 +75,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 12ULL
+#define O2NET_PROTOCOL_VERSION 11ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index cdeafb4e7ed6..0bb128659d4b 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
struct dlm_lock *lock, int flags, int type)
{
enum dlm_status status;
- u8 old_owner = res->owner;
mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- lock->convert_pending = 0;
/* if it failed, move it back to granted queue.
* if master returns DLM_NORMAL and then down before sending ast,
* it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
if (status != DLM_NOTQUEUED)
dlm_error(status);
dlm_revert_pending_convert(res, lock);
- } else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
- (old_owner != res->owner)) {
- mlog(0, "res %.*s is in recovering or has been recovered.\n",
- res->lockname.len, res->lockname.name);
+ } else if (!lock->convert_pending) {
+ mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+ "to granted list, retry convert.\n",
+ dlm->name, res->lockname.len, res->lockname.name);
status = DLM_RECOVERING;
}
+
+ lock->convert_pending = 0;
bail:
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4e7b0dc22450..0b055bfb8e86 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
int ret = 0;
- u64 tmpend, end = start + len;
+ u64 tmpend = 0;
+ u64 end = start + len;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
@@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
}
/*
- * We want to get the byte offset of the end of the 1st cluster.
+ * If start is on a cluster boundary and end is somewhere in another
+ * cluster, we have not COWed the cluster starting at start, unless
+ * end is also within the same cluster. So, in this case, we skip this
+ * first call to ocfs2_zero_range_for_truncate() truncate and move on
+ * to the next one.
*/
- tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
- if (tmpend > end)
- tmpend = end;
+ if ((start & (csize - 1)) != 0) {
+ /*
+ * We want to get the byte offset of the end of the 1st
+ * cluster.
+ */
+ tmpend = (u64)osb->s_clustersize +
+ (start & ~(osb->s_clustersize - 1));
+ if (tmpend > end)
+ tmpend = end;
- trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
- (unsigned long long)tmpend);
+ trace_ocfs2_zero_partial_clusters_range1(
+ (unsigned long long)start,
+ (unsigned long long)tmpend);
- ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
- if (ret)
- mlog_errno(ret);
+ ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+ tmpend);
+ if (ret)
+ mlog_errno(ret);
+ }
if (tmpend < end) {
/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea47120a85ff..6ad3533940ba 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1199,14 +1199,24 @@ retry:
inode_unlock((*ac)->ac_inode);
ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
- if (ret == 1)
+ if (ret == 1) {
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
goto retry;
+ }
if (ret < 0)
mlog_errno(ret);
inode_lock((*ac)->ac_inode);
- ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ inode_unlock((*ac)->ac_inode);
+ iput((*ac)->ac_inode);
+ (*ac)->ac_inode = NULL;
+ goto bail;
+ }
}
if (status < 0) {
if (status != -ENOSPC)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 43fdc2765aea..db37a0e02d32 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -105,6 +105,13 @@ retry:
goto retry;
}
+ error = security_inode_copy_up_xattr(name);
+ if (error < 0 && error != -EOPNOTSUPP)
+ break;
+ if (error == 1) {
+ error = 0;
+ continue; /* Discard */
+ }
error = vfs_setxattr(new, name, value, size, 0);
if (error)
break;
@@ -248,6 +255,8 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
struct dentry *upper = NULL;
umode_t mode = stat->mode;
int err;
+ const struct cred *old_creds = NULL;
+ struct cred *new_creds = NULL;
newdentry = ovl_lookup_temp(workdir, dentry);
err = PTR_ERR(newdentry);
@@ -260,10 +269,23 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (IS_ERR(upper))
goto out1;
+ err = security_inode_copy_up(dentry, &new_creds);
+ if (err < 0)
+ goto out2;
+
+ if (new_creds)
+ old_creds = override_creds(new_creds);
+
/* Can't properly set mode on creation because of the umask */
stat->mode &= S_IFMT;
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
stat->mode = mode;
+
+ if (new_creds) {
+ revert_creds(old_creds);
+ put_cred(new_creds);
+ }
+
if (err)
goto out2;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 1560fdc09a5f..b0ffa1d1677e 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -489,6 +489,15 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
if (override_cred) {
override_cred->fsuid = inode->i_uid;
override_cred->fsgid = inode->i_gid;
+ if (!hardlink) {
+ err = security_dentry_create_files_as(dentry,
+ stat->mode, &dentry->d_name, old_cred,
+ override_cred);
+ if (err) {
+ put_cred(override_cred);
+ goto out_revert_creds;
+ }
+ }
put_cred(override_creds(override_cred));
put_cred(override_cred);
@@ -499,6 +508,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
err = ovl_create_over_whiteout(dentry, inode, stat,
link, hardlink);
}
+out_revert_creds:
revert_creds(old_cred);
if (!err) {
struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ac0df4dde823..3b792ab3c0dc 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -483,7 +483,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
save_stack_trace_tsk(task, &trace);
for (i = 0; i < trace.nr_entries; i++) {
- seq_printf(m, "[<%pK>] %pS\n",
+ seq_printf(m, "[<%pK>] %pB\n",
(void *)entries[i], (void *)entries[i]);
}
unlock_trace(task);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index c633476616e0..bca66d83a765 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -390,6 +390,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
atomic_set(&ent->count, 1);
spin_lock_init(&ent->pde_unload_lock);
INIT_LIST_HEAD(&ent->pde_openers);
+ proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+
out:
return ent;
}
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a939f5ed7f89..5c89a07e3d7f 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
+ char *buf = file->private_data;
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
@@ -500,23 +501,20 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (clear_user(buffer, tsz))
return -EFAULT;
} else if (is_vmalloc_or_module_addr((void *)start)) {
- char * elf_buf;
-
- elf_buf = kzalloc(tsz, GFP_KERNEL);
- if (!elf_buf)
- return -ENOMEM;
- vread(elf_buf, (char *)start, tsz);
+ vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, elf_buf, tsz)) {
- kfree(elf_buf);
+ if (copy_to_user(buffer, buf, tsz))
return -EFAULT;
- }
- kfree(elf_buf);
} else {
if (kern_addr_valid(start)) {
unsigned long n;
- n = copy_to_user(buffer, (char *)start, tsz);
+ /*
+ * Using bounce buffer to bypass the
+ * hardened user copy kernel text checks.
+ */
+ memcpy(buf, (char *) start, tsz);
+ n = copy_to_user(buffer, buf, tsz);
/*
* We cannot distinguish between fault on source
* and fault on destination. When this happens
@@ -549,6 +547,11 @@ static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
+
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
static const struct file_operations proc_kcore_operations = {
.read = read_kcore,
.open = open_kcore,
+ .release = release_kcore,
.llseek = default_llseek,
};
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index c8bbc68cdb05..7ae6b1da7cab 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -21,6 +21,7 @@
#include <linux/bitops.h>
#include <linux/mount.h>
#include <linux/nsproxy.h>
+#include <linux/uidgid.h>
#include <net/net_namespace.h>
#include <linux/seq_file.h>
@@ -185,6 +186,8 @@ const struct file_operations proc_net_operations = {
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *netd, *net_statd;
+ kuid_t uid;
+ kgid_t gid;
int err;
err = -ENOMEM;
@@ -199,6 +202,16 @@ static __net_init int proc_net_ns_init(struct net *net)
netd->parent = &proc_root;
memcpy(netd->name, "net", 4);
+ uid = make_kuid(net->user_ns, 0);
+ if (!uid_valid(uid))
+ uid = netd->uid;
+
+ gid = make_kgid(net->user_ns, 0);
+ if (!gid_valid(gid))
+ gid = netd->gid;
+
+ proc_set_user(netd, uid, gid);
+
err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);
if (!net_statd)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1b93650dda2f..2ed3d71d4767 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -430,6 +430,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i
static struct inode *proc_sys_make_inode(struct super_block *sb,
struct ctl_table_header *head, struct ctl_table *table)
{
+ struct ctl_table_root *root = head->root;
struct inode *inode;
struct proc_inode *ei;
@@ -457,6 +458,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
if (is_empty_dir(head))
make_empty_dir_inode(inode);
}
+
+ if (root->set_ownership)
+ root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+
out:
return inode;
}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35df08ee9c97..2d445425aad7 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -341,6 +341,7 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
struct qc_state state;
int ret;
+ memset(&state, 0, sizeof (struct qc_state));
ret = sb->s_qcop->get_state(sb, &state);
if (ret < 0)
return ret;
@@ -365,17 +366,19 @@ static int quota_getstate(struct super_block *sb, struct fs_quota_stat *fqs)
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
- if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+
+ /* Inodes may be allocated even if inactive; copy out if present */
+ if (state.s_state[USRQUOTA].ino) {
fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
}
- if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+ if (state.s_state[GRPQUOTA].ino) {
fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
}
- if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+ if (state.s_state[PRJQUOTA].ino) {
/*
* Q_XGETQSTAT doesn't have room for both group and project
* quotas. So, allow the project quota values to be copied out
@@ -411,6 +414,7 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
struct qc_state state;
int ret;
+ memset(&state, 0, sizeof (struct qc_state));
ret = sb->s_qcop->get_state(sb, &state);
if (ret < 0)
return ret;
@@ -435,17 +439,19 @@ static int quota_getstatev(struct super_block *sb, struct fs_quota_statv *fqs)
fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit;
fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit;
fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit;
- if (state.s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) {
+
+ /* Inodes may be allocated even if inactive; copy out if present */
+ if (state.s_state[USRQUOTA].ino) {
fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino;
fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks;
fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents;
}
- if (state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) {
+ if (state.s_state[GRPQUOTA].ino) {
fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino;
fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks;
fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents;
}
- if (state.s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) {
+ if (state.s_state[PRJQUOTA].ino) {
fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino;
fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks;
fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents;
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 183a212694bf..12af0490322f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -27,9 +27,17 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/ramfs.h>
+#include <linux/sched.h>
#include "internal.h"
+static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
const struct file_operations ramfs_file_operations = {
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
@@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = {
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.llseek = generic_file_llseek,
+ .get_unmapped_area = ramfs_mmu_get_unmapped_area,
};
const struct inode_operations ramfs_file_inode_operations = {
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7a4a85a6821e..74d5ddd26296 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -190,7 +190,15 @@ static int remove_save_link_only(struct super_block *s,
static int reiserfs_quota_on_mount(struct super_block *, int);
#endif
-/* look for uncompleted unlinks and truncates and complete them */
+/*
+ * Look for uncompleted unlinks and truncates and complete them
+ *
+ * Called with superblock write locked. If quotas are enabled, we have to
+ * release/retake lest we call dquot_quota_on_mount(), proceed to
+ * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per
+ * cpu worklets to complete flush_async_commits() that in turn wait for the
+ * superblock write lock.
+ */
static int finish_unfinished(struct super_block *s)
{
INITIALIZE_PATH(path);
@@ -237,7 +245,9 @@ static int finish_unfinished(struct super_block *s)
quota_enabled[i] = 0;
continue;
}
+ reiserfs_write_unlock(s);
ret = reiserfs_quota_on_mount(s, i);
+ reiserfs_write_lock(s);
if (ret < 0)
reiserfs_warning(s, "reiserfs-2500",
"cannot turn on journaled "
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index dc1358b5ec95..ac2de0ed69ad 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -233,8 +233,8 @@ void sysfs_remove_group(struct kobject *kobj,
kn = kernfs_find_and_get(parent, grp->name);
if (!kn) {
WARN(!kn, KERN_WARNING
- "sysfs group %p not found for kobject '%s'\n",
- grp, kobject_name(kobj));
+ "sysfs group '%s' not found for kobject '%s'\n",
+ grp->name, kobject_name(kobj));
return;
}
} else {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 632570617327..e855bf8d74b4 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -94,7 +94,7 @@ static int udf_adinicb_write_begin(struct file *file,
return -ENOMEM;
*pagep = page;
- if (!PageUptodate(page) && len != PAGE_SIZE)
+ if (!PageUptodate(page))
__udf_adinicb_readpage(page);
return 0;
}
@@ -105,11 +105,25 @@ static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return 0;
}
+static int udf_adinicb_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = page->mapping->host;
+ loff_t last_pos = pos + copied;
+ if (last_pos > inode->i_size)
+ i_size_write(inode, last_pos);
+ set_page_dirty(page);
+ unlock_page(page);
+ put_page(page);
+ return copied;
+}
+
const struct address_space_operations udf_adinicb_aops = {
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
.write_begin = udf_adinicb_write_begin,
- .write_end = simple_write_end,
+ .write_end = udf_adinicb_write_end,
.direct_IO = udf_adinicb_direct_IO,
};