summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-09-14 08:19:08 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-09-14 08:19:08 -0400
commit905ec87e93bc9e01b15c60035cd6a50c636cbaef (patch)
tree46fd7618d6511611ffc19eb0dd4d7bc6b90a41c2 /fs
parent1d6ae775d7a948c9575658eb41184fd2e506c0df (diff)
parent2f4ba45a75d6383b4a1201169a808ffea416ffa0 (diff)
downloadlwn-905ec87e93bc9e01b15c60035cd6a50c636cbaef.tar.gz
lwn-905ec87e93bc9e01b15c60035cd6a50c636cbaef.zip
Merge /spare/repo/linux-2.6/
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/9p.c359
-rw-r--r--fs/9p/9p.h341
-rw-r--r--fs/9p/Makefile17
-rw-r--r--fs/9p/conv.c693
-rw-r--r--fs/9p/conv.h36
-rw-r--r--fs/9p/debug.h70
-rw-r--r--fs/9p/error.c93
-rw-r--r--fs/9p/error.h178
-rw-r--r--fs/9p/fid.c241
-rw-r--r--fs/9p/fid.h57
-rw-r--r--fs/9p/mux.c475
-rw-r--r--fs/9p/mux.h41
-rw-r--r--fs/9p/trans_fd.c172
-rw-r--r--fs/9p/trans_sock.c290
-rw-r--r--fs/9p/transport.h46
-rw-r--r--fs/9p/v9fs.c452
-rw-r--r--fs/9p/v9fs.h103
-rw-r--r--fs/9p/v9fs_vfs.h53
-rw-r--r--fs/9p/vfs_dentry.c126
-rw-r--r--fs/9p/vfs_dir.c226
-rw-r--r--fs/9p/vfs_file.c401
-rw-r--r--fs/9p/vfs_inode.c1338
-rw-r--r--fs/9p/vfs_super.c280
-rw-r--r--fs/Kconfig24
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/inode.c1
-rw-r--r--fs/aio.c34
-rw-r--r--fs/autofs/autofs_i.h3
-rw-r--r--fs/autofs/dirhash.c5
-rw-r--r--fs/autofs/inode.c3
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/dir.c25
-rw-r--r--fs/bfs/file.c23
-rw-r--r--fs/bfs/inode.c104
-rw-r--r--fs/bio.c2
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/compat.c6
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/cramfs/uncompress.c1
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/xattr.h8
-rw-r--r--fs/ext2/xattr_security.c22
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/xattr.h11
-rw-r--r--fs/ext3/xattr_security.c22
-rw-r--r--fs/fat/inode.c2
-rw-r--r--fs/fcntl.c60
-rw-r--r--fs/file.c387
-rw-r--r--fs/file_table.c40
-rw-r--r--fs/fuse/Makefile7
-rw-r--r--fs/fuse/dev.c877
-rw-r--r--fs/fuse/dir.c982
-rw-r--r--fs/fuse/file.c555
-rw-r--r--fs/fuse/fuse_i.h451
-rw-r--r--fs/fuse/inode.c591
-rw-r--r--fs/hostfs/hostfs_kern.c1
-rw-r--r--fs/hpfs/inode.c1
-rw-r--r--fs/inode.c12
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jffs/inode-v23.c1
-rw-r--r--fs/jffs/intrep.c22
-rw-r--r--fs/jfs/acl.c24
-rw-r--r--fs/jfs/inode.c26
-rw-r--r--fs/jfs/jfs_acl.h12
-rw-r--r--fs/jfs/jfs_xattr.h14
-rw-r--r--fs/jfs/namei.c85
-rw-r--r--fs/jfs/xattr.c94
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/locks.c8
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/namei.c26
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4proc.c12
-rw-r--r--fs/ntfs/ChangeLog71
-rw-r--r--fs/ntfs/Makefile2
-rw-r--r--fs/ntfs/aops.c294
-rw-r--r--fs/ntfs/attrib.c125
-rw-r--r--fs/ntfs/attrib.h2
-rw-r--r--fs/ntfs/compress.c8
-rw-r--r--fs/ntfs/dir.c3
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ntfs/index.c1
-rw-r--r--fs/ntfs/inode.c227
-rw-r--r--fs/ntfs/lcnalloc.c39
-rw-r--r--fs/ntfs/lcnalloc.h21
-rw-r--r--fs/ntfs/logfile.c251
-rw-r--r--fs/ntfs/logfile.h8
-rw-r--r--fs/ntfs/malloc.h48
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/runlist.c374
-rw-r--r--fs/ntfs/runlist.h3
-rw-r--r--fs/ntfs/super.c30
-rw-r--r--fs/ntfs/unistr.c3
-rw-r--r--fs/open.c43
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/proc/array.c5
-rw-r--r--fs/proc/base.c33
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/qnx4/inode.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c3
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/select.c23
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/smbfs/proc.c3
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/xfs/Kconfig45
-rw-r--r--fs/xfs/Makefile-linux-2.615
-rw-r--r--fs/xfs/linux-2.6/time.h3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c12
-rw-r--r--fs/xfs/quota/Makefile-linux-2.68
-rw-r--r--fs/xfs/support/ktrace.c2
-rw-r--r--fs/xfs/xfs_arch.h22
-rw-r--r--fs/xfs/xfs_bmap_btree.c8
-rw-r--r--fs/xfs/xfs_bmap_btree.h12
-rw-r--r--fs/xfs/xfs_dir_leaf.h6
-rw-r--r--fs/xfs/xfs_inode_item.c4
-rw-r--r--fs/xfs/xfs_log_priv.h10
129 files changed, 11604 insertions, 946 deletions
diff --git a/fs/9p/9p.c b/fs/9p/9p.c
new file mode 100644
index 000000000000..e847f504a47c
--- /dev/null
+++ b/fs/9p/9p.c
@@ -0,0 +1,359 @@
+/*
+ * linux/fs/9p/9p.c
+ *
+ * This file contains functions 9P2000 functions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "mux.h"
+
+/**
+ * v9fs_t_version - negotiate protocol parameters with sever
+ * @v9ses: 9P2000 session information
+ * @msize: requested max size packet
+ * @version: requested version.extension string
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+ char *version, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
+ msg.id = TVERSION;
+ msg.params.tversion.msize = msize;
+ msg.params.tversion.version = version;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_attach - mount the server
+ * @v9ses: 9P2000 session information
+ * @uname: user name doing the attach
+ * @aname: remote name being attached to
+ * @fid: mount fid to attatch to root node
+ * @afid: authentication fid (in this case result key)
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+ u32 fid, u32 afid, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
+ aname, fid, afid);
+ msg.id = TATTACH;
+ msg.params.tattach.fid = fid;
+ msg.params.tattach.afid = afid;
+ msg.params.tattach.uname = uname;
+ msg.params.tattach.aname = aname;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_clunk - release a fid (finish a transaction)
+ * @v9ses: 9P2000 session information
+ * @fid: fid to release
+ * @fcall: pointer to response fcall pointer
+ *
+ */
+
+int
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ msg.id = TCLUNK;
+ msg.params.tclunk.fid = fid;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_v9fs_t_flush - flush a pending transaction
+ * @v9ses: 9P2000 session information
+ * @tag: tid to release
+ *
+ */
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "oldtag %d\n", tag);
+ msg.id = TFLUSH;
+ msg.params.tflush.oldtag = tag;
+ return v9fs_mux_rpc(v9ses, &msg, NULL);
+}
+
+/**
+ * v9fs_t_stat - read a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to get info about
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ if (fcall)
+ *fcall = NULL;
+
+ msg.id = TSTAT;
+ msg.params.tstat.fid = fid;
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_wstat - write a file's meta-data
+ * @v9ses: 9P2000 session information
+ * @fid: fid pointing to file or directory to write info about
+ * @stat: metadata
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
+ msg.id = TWSTAT;
+ msg.params.twstat.fid = fid;
+ msg.params.twstat.stat = stat;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_walk - walk a fid to a new file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to walk
+ * @newfid: new fid (for clone operations)
+ * @name: path to walk fid to
+ * @fcall: pointer to response fcall
+ *
+ */
+
+/* TODO: support multiple walk */
+
+int
+v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+ char *name, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
+ msg.id = TWALK;
+ msg.params.twalk.fid = fid;
+ msg.params.twalk.newfid = newfid;
+
+ if (name) {
+ msg.params.twalk.nwname = 1;
+ msg.params.twalk.wnames = &name;
+ } else {
+ msg.params.twalk.nwname = 0;
+ }
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_open - open a file
+ *
+ * @v9ses - 9P2000 session information
+ * @fid - fid to open
+ * @mode - mode to open file (R, RW, etc)
+ * @fcall - pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
+ msg.id = TOPEN;
+ msg.params.topen.fid = fid;
+ msg.params.topen.mode = mode;
+
+ errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+
+ return errorno;
+}
+
+/**
+ * v9fs_t_remove - remove a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to remove
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d\n", fid);
+ msg.id = TREMOVE;
+ msg.params.tremove.fid = fid;
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_create - create a file or directory
+ * @v9ses: 9P2000 session information
+ * @fid: fid to create
+ * @name: name of the file or directory to create
+ * @perm: permissions to create with
+ * @mode: mode to open file (R, RW, etc)
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+ u32 perm, u8 mode, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+
+ dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
+ fid, name, perm, mode);
+
+ msg.id = TCREATE;
+ msg.params.tcreate.fid = fid;
+ msg.params.tcreate.name = name;
+ msg.params.tcreate.perm = perm;
+ msg.params.tcreate.mode = mode;
+
+ return v9fs_mux_rpc(v9ses, &msg, fcall);
+}
+
+/**
+ * v9fs_t_read - read data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to read from
+ * @offset: offset to start read at
+ * @count: how many bytes to read
+ * @fcall: pointer to response fcall (with data)
+ *
+ */
+
+int
+v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+ u32 count, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ struct v9fs_fcall *rc = NULL;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
+ (long unsigned int)offset, count);
+ msg.id = TREAD;
+ msg.params.tread.fid = fid;
+ msg.params.tread.offset = offset;
+ msg.params.tread.count = count;
+ errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+ if (!errorno) {
+ errorno = rc->params.rread.count;
+ dump_data(rc->params.rread.data, rc->params.rread.count);
+ }
+
+ if (fcall)
+ *fcall = rc;
+ else
+ kfree(rc);
+
+ return errorno;
+}
+
+/**
+ * v9fs_t_write - write data
+ * @v9ses: 9P2000 session information
+ * @fid: fid to write to
+ * @offset: offset to start write at
+ * @count: how many bytes to write
+ * @fcall: pointer to response fcall
+ *
+ */
+
+int
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
+ u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+{
+ struct v9fs_fcall msg;
+ struct v9fs_fcall *rc = NULL;
+ long errorno = -1;
+
+ dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
+ (unsigned long long)offset, count);
+ dump_data(data, count);
+
+ msg.id = TWRITE;
+ msg.params.twrite.fid = fid;
+ msg.params.twrite.offset = offset;
+ msg.params.twrite.count = count;
+ msg.params.twrite.data = data;
+
+ errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+
+ if (!errorno)
+ errorno = rc->params.rwrite.count;
+
+ if (fcall)
+ *fcall = rc;
+ else
+ kfree(rc);
+
+ return errorno;
+}
diff --git a/fs/9p/9p.h b/fs/9p/9p.h
new file mode 100644
index 000000000000..f55424216be2
--- /dev/null
+++ b/fs/9p/9p.h
@@ -0,0 +1,341 @@
+/*
+ * linux/fs/9p/9p.h
+ *
+ * 9P protocol definitions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* Message Types */
+enum {
+ TVERSION = 100,
+ RVERSION,
+ TAUTH = 102,
+ RAUTH,
+ TATTACH = 104,
+ RATTACH,
+ TERROR = 106,
+ RERROR,
+ TFLUSH = 108,
+ RFLUSH,
+ TWALK = 110,
+ RWALK,
+ TOPEN = 112,
+ ROPEN,
+ TCREATE = 114,
+ RCREATE,
+ TREAD = 116,
+ RREAD,
+ TWRITE = 118,
+ RWRITE,
+ TCLUNK = 120,
+ RCLUNK,
+ TREMOVE = 122,
+ RREMOVE,
+ TSTAT = 124,
+ RSTAT,
+ TWSTAT = 126,
+ RWSTAT,
+};
+
+/* modes */
+enum {
+ V9FS_OREAD = 0x00,
+ V9FS_OWRITE = 0x01,
+ V9FS_ORDWR = 0x02,
+ V9FS_OEXEC = 0x03,
+ V9FS_OEXCL = 0x04,
+ V9FS_OTRUNC = 0x10,
+ V9FS_OREXEC = 0x20,
+ V9FS_ORCLOSE = 0x40,
+ V9FS_OAPPEND = 0x80,
+};
+
+/* permissions */
+enum {
+ V9FS_DMDIR = 0x80000000,
+ V9FS_DMAPPEND = 0x40000000,
+ V9FS_DMEXCL = 0x20000000,
+ V9FS_DMMOUNT = 0x10000000,
+ V9FS_DMAUTH = 0x08000000,
+ V9FS_DMTMP = 0x04000000,
+ V9FS_DMSYMLINK = 0x02000000,
+ V9FS_DMLINK = 0x01000000,
+ /* 9P2000.u extensions */
+ V9FS_DMDEVICE = 0x00800000,
+ V9FS_DMNAMEDPIPE = 0x00200000,
+ V9FS_DMSOCKET = 0x00100000,
+ V9FS_DMSETUID = 0x00080000,
+ V9FS_DMSETGID = 0x00040000,
+};
+
+/* qid.types */
+enum {
+ V9FS_QTDIR = 0x80,
+ V9FS_QTAPPEND = 0x40,
+ V9FS_QTEXCL = 0x20,
+ V9FS_QTMOUNT = 0x10,
+ V9FS_QTAUTH = 0x08,
+ V9FS_QTTMP = 0x04,
+ V9FS_QTSYMLINK = 0x02,
+ V9FS_QTLINK = 0x01,
+ V9FS_QTFILE = 0x00,
+};
+
+/* ample room for Twrite/Rread header (iounit) */
+#define V9FS_IOHDRSZ 24
+
+/* qids are the unique ID for a file (like an inode */
+struct v9fs_qid {
+ u8 type;
+ u32 version;
+ u64 path;
+};
+
+/* Plan 9 file metadata (stat) structure */
+struct v9fs_stat {
+ u16 size;
+ u16 type;
+ u32 dev;
+ struct v9fs_qid qid;
+ u32 mode;
+ u32 atime;
+ u32 mtime;
+ u64 length;
+ char *name;
+ char *uid;
+ char *gid;
+ char *muid;
+ char *extension; /* 9p2000.u extensions */
+ u32 n_uid; /* 9p2000.u extensions */
+ u32 n_gid; /* 9p2000.u extensions */
+ u32 n_muid; /* 9p2000.u extensions */
+ char data[0];
+};
+
+/* Structures for Protocol Operations */
+
+struct Tversion {
+ u32 msize;
+ char *version;
+};
+
+struct Rversion {
+ u32 msize;
+ char *version;
+};
+
+struct Tauth {
+ u32 afid;
+ char *uname;
+ char *aname;
+};
+
+struct Rauth {
+ struct v9fs_qid qid;
+};
+
+struct Rerror {
+ char *error;
+ u32 errno; /* 9p2000.u extension */
+};
+
+struct Tflush {
+ u32 oldtag;
+};
+
+struct Rflush {
+};
+
+struct Tattach {
+ u32 fid;
+ u32 afid;
+ char *uname;
+ char *aname;
+};
+
+struct Rattach {
+ struct v9fs_qid qid;
+};
+
+struct Twalk {
+ u32 fid;
+ u32 newfid;
+ u32 nwname;
+ char **wnames;
+};
+
+struct Rwalk {
+ u32 nwqid;
+ struct v9fs_qid *wqids;
+};
+
+struct Topen {
+ u32 fid;
+ u8 mode;
+};
+
+struct Ropen {
+ struct v9fs_qid qid;
+ u32 iounit;
+};
+
+struct Tcreate {
+ u32 fid;
+ char *name;
+ u32 perm;
+ u8 mode;
+};
+
+struct Rcreate {
+ struct v9fs_qid qid;
+ u32 iounit;
+};
+
+struct Tread {
+ u32 fid;
+ u64 offset;
+ u32 count;
+};
+
+struct Rread {
+ u32 count;
+ u8 *data;
+};
+
+struct Twrite {
+ u32 fid;
+ u64 offset;
+ u32 count;
+ u8 *data;
+};
+
+struct Rwrite {
+ u32 count;
+};
+
+struct Tclunk {
+ u32 fid;
+};
+
+struct Rclunk {
+};
+
+struct Tremove {
+ u32 fid;
+};
+
+struct Rremove {
+};
+
+struct Tstat {
+ u32 fid;
+};
+
+struct Rstat {
+ struct v9fs_stat *stat;
+};
+
+struct Twstat {
+ u32 fid;
+ struct v9fs_stat *stat;
+};
+
+struct Rwstat {
+};
+
+/*
+ * fcall is the primary packet structure
+ *
+ */
+
+struct v9fs_fcall {
+ u32 size;
+ u8 id;
+ u16 tag;
+
+ union {
+ struct Tversion tversion;
+ struct Rversion rversion;
+ struct Tauth tauth;
+ struct Rauth rauth;
+ struct Rerror rerror;
+ struct Tflush tflush;
+ struct Rflush rflush;
+ struct Tattach tattach;
+ struct Rattach rattach;
+ struct Twalk twalk;
+ struct Rwalk rwalk;
+ struct Topen topen;
+ struct Ropen ropen;
+ struct Tcreate tcreate;
+ struct Rcreate rcreate;
+ struct Tread tread;
+ struct Rread rread;
+ struct Twrite twrite;
+ struct Rwrite rwrite;
+ struct Tclunk tclunk;
+ struct Rclunk rclunk;
+ struct Tremove tremove;
+ struct Rremove rremove;
+ struct Tstat tstat;
+ struct Rstat rstat;
+ struct Twstat twstat;
+ struct Rwstat rwstat;
+ } params;
+};
+
+#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+
+int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
+ char *version, struct v9fs_fcall **rcall);
+
+int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
+ u32 fid, u32 afid, struct v9fs_fcall **rcall);
+
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
+
+int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+
+int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
+ char *name, struct v9fs_fcall **rcall);
+
+int v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
+ struct v9fs_fcall **rcall);
+
+int v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
+ u32 perm, u8 mode, struct v9fs_fcall **rcall);
+
+int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
+ u64 offset, u32 count, struct v9fs_fcall **rcall);
+
+int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
+ u32 count, void *data, struct v9fs_fcall **rcall);
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
new file mode 100644
index 000000000000..e4e4ffe5a7dc
--- /dev/null
+++ b/fs/9p/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_9P_FS) := 9p2000.o
+
+9p2000-objs := \
+ vfs_super.o \
+ vfs_inode.o \
+ vfs_file.o \
+ vfs_dir.o \
+ vfs_dentry.o \
+ error.o \
+ mux.o \
+ trans_fd.o \
+ trans_sock.o \
+ 9p.o \
+ conv.o \
+ v9fs.o \
+ fid.o
+
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
new file mode 100644
index 000000000000..1554731bd653
--- /dev/null
+++ b/fs/9p/conv.c
@@ -0,0 +1,693 @@
+/*
+ * linux/fs/9p/conv.c
+ *
+ * 9P protocol conversion functions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "conv.h"
+
+/*
+ * Buffer to help with string parsing
+ */
+struct cbuf {
+ unsigned char *sp;
+ unsigned char *p;
+ unsigned char *ep;
+};
+
+static inline void buf_init(struct cbuf *buf, void *data, int datalen)
+{
+ buf->sp = buf->p = data;
+ buf->ep = data + datalen;
+}
+
+static inline int buf_check_overflow(struct cbuf *buf)
+{
+ return buf->p > buf->ep;
+}
+
+static inline void buf_check_size(struct cbuf *buf, int len)
+{
+ if (buf->p+len > buf->ep) {
+ if (buf->p < buf->ep) {
+ eprintk(KERN_ERR, "buffer overflow\n");
+ buf->p = buf->ep + 1;
+ }
+ }
+}
+
+static inline void *buf_alloc(struct cbuf *buf, int len)
+{
+ void *ret = NULL;
+
+ buf_check_size(buf, len);
+ ret = buf->p;
+ buf->p += len;
+
+ return ret;
+}
+
+static inline void buf_put_int8(struct cbuf *buf, u8 val)
+{
+ buf_check_size(buf, 1);
+
+ buf->p[0] = val;
+ buf->p++;
+}
+
+static inline void buf_put_int16(struct cbuf *buf, u16 val)
+{
+ buf_check_size(buf, 2);
+
+ *(__le16 *) buf->p = cpu_to_le16(val);
+ buf->p += 2;
+}
+
+static inline void buf_put_int32(struct cbuf *buf, u32 val)
+{
+ buf_check_size(buf, 4);
+
+ *(__le32 *)buf->p = cpu_to_le32(val);
+ buf->p += 4;
+}
+
+static inline void buf_put_int64(struct cbuf *buf, u64 val)
+{
+ buf_check_size(buf, 8);
+
+ *(__le64 *)buf->p = cpu_to_le64(val);
+ buf->p += 8;
+}
+
+static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
+{
+ buf_check_size(buf, slen + 2);
+
+ buf_put_int16(buf, slen);
+ memcpy(buf->p, s, slen);
+ buf->p += slen;
+}
+
+static inline void buf_put_string(struct cbuf *buf, const char *s)
+{
+ buf_put_stringn(buf, s, strlen(s));
+}
+
+static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
+{
+ buf_check_size(buf, datalen);
+
+ memcpy(buf->p, data, datalen);
+ buf->p += datalen;
+}
+
+static inline u8 buf_get_int8(struct cbuf *buf)
+{
+ u8 ret = 0;
+
+ buf_check_size(buf, 1);
+ ret = buf->p[0];
+
+ buf->p++;
+
+ return ret;
+}
+
+static inline u16 buf_get_int16(struct cbuf *buf)
+{
+ u16 ret = 0;
+
+ buf_check_size(buf, 2);
+ ret = le16_to_cpu(*(__le16 *)buf->p);
+
+ buf->p += 2;
+
+ return ret;
+}
+
+static inline u32 buf_get_int32(struct cbuf *buf)
+{
+ u32 ret = 0;
+
+ buf_check_size(buf, 4);
+ ret = le32_to_cpu(*(__le32 *)buf->p);
+
+ buf->p += 4;
+
+ return ret;
+}
+
+static inline u64 buf_get_int64(struct cbuf *buf)
+{
+ u64 ret = 0;
+
+ buf_check_size(buf, 8);
+ ret = le64_to_cpu(*(__le64 *)buf->p);
+
+ buf->p += 8;
+
+ return ret;
+}
+
+static inline int
+buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
+{
+
+ u16 len = buf_get_int16(buf);
+ buf_check_size(buf, len);
+ if (len + 1 > datalen)
+ return 0;
+
+ memcpy(data, buf->p, len);
+ data[len] = 0;
+ buf->p += len;
+
+ return len + 1;
+}
+
+static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
+{
+ char *ret = NULL;
+ int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
+
+ if (n > 0) {
+ ret = sbuf->p;
+ sbuf->p += n;
+ }
+
+ return ret;
+}
+
+static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+{
+ buf_check_size(buf, datalen);
+
+ memcpy(data, buf->p, datalen);
+ buf->p += datalen;
+
+ return datalen;
+}
+
+static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
+ int datalen)
+{
+ char *ret = NULL;
+ int n = 0;
+
+ buf_check_size(dbuf, datalen);
+
+ n = buf_get_data(buf, dbuf->p, datalen);
+
+ if (n > 0) {
+ ret = dbuf->p;
+ dbuf->p += n;
+ }
+
+ return ret;
+}
+
+/**
+ * v9fs_size_stat - calculate the size of a variable length stat struct
+ * @v9ses: session information
+ * @stat: metadata (stat) structure
+ *
+ */
+
+static int v9fs_size_stat(struct v9fs_session_info *v9ses,
+ struct v9fs_stat *stat)
+{
+ int size = 0;
+
+ if (stat == NULL) {
+ eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
+ return 0;
+ }
+
+ size = /* 2 + *//* size[2] */
+ 2 + /* type[2] */
+ 4 + /* dev[4] */
+ 1 + /* qid.type[1] */
+ 4 + /* qid.vers[4] */
+ 8 + /* qid.path[8] */
+ 4 + /* mode[4] */
+ 4 + /* atime[4] */
+ 4 + /* mtime[4] */
+ 8 + /* length[8] */
+ 8; /* minimum sum of string lengths */
+
+ if (stat->name)
+ size += strlen(stat->name);
+ if (stat->uid)
+ size += strlen(stat->uid);
+ if (stat->gid)
+ size += strlen(stat->gid);
+ if (stat->muid)
+ size += strlen(stat->muid);
+
+ if (v9ses->extended) {
+ size += 4 + /* n_uid[4] */
+ 4 + /* n_gid[4] */
+ 4 + /* n_muid[4] */
+ 2; /* string length of extension[4] */
+ if (stat->extension)
+ size += strlen(stat->extension);
+ }
+
+ return size;
+}
+
+/**
+ * serialize_stat - safely format a stat structure for transmission
+ * @v9ses: session info
+ * @stat: metadata (stat) structure
+ * @bufp: buffer to serialize structure into
+ *
+ */
+
+static int
+serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
+ struct cbuf *bufp)
+{
+ buf_put_int16(bufp, stat->size);
+ buf_put_int16(bufp, stat->type);
+ buf_put_int32(bufp, stat->dev);
+ buf_put_int8(bufp, stat->qid.type);
+ buf_put_int32(bufp, stat->qid.version);
+ buf_put_int64(bufp, stat->qid.path);
+ buf_put_int32(bufp, stat->mode);
+ buf_put_int32(bufp, stat->atime);
+ buf_put_int32(bufp, stat->mtime);
+ buf_put_int64(bufp, stat->length);
+
+ buf_put_string(bufp, stat->name);
+ buf_put_string(bufp, stat->uid);
+ buf_put_string(bufp, stat->gid);
+ buf_put_string(bufp, stat->muid);
+
+ if (v9ses->extended) {
+ buf_put_string(bufp, stat->extension);
+ buf_put_int32(bufp, stat->n_uid);
+ buf_put_int32(bufp, stat->n_gid);
+ buf_put_int32(bufp, stat->n_muid);
+ }
+
+ if (buf_check_overflow(bufp))
+ return 0;
+
+ return stat->size;
+}
+
+/**
+ * deserialize_stat - safely decode a recieved metadata (stat) structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @stat: metadata (stat) structure
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline int
+deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
+ struct v9fs_stat *stat, struct cbuf *dbufp)
+{
+
+ stat->size = buf_get_int16(bufp);
+ stat->type = buf_get_int16(bufp);
+ stat->dev = buf_get_int32(bufp);
+ stat->qid.type = buf_get_int8(bufp);
+ stat->qid.version = buf_get_int32(bufp);
+ stat->qid.path = buf_get_int64(bufp);
+ stat->mode = buf_get_int32(bufp);
+ stat->atime = buf_get_int32(bufp);
+ stat->mtime = buf_get_int32(bufp);
+ stat->length = buf_get_int64(bufp);
+ stat->name = buf_get_stringb(bufp, dbufp);
+ stat->uid = buf_get_stringb(bufp, dbufp);
+ stat->gid = buf_get_stringb(bufp, dbufp);
+ stat->muid = buf_get_stringb(bufp, dbufp);
+
+ if (v9ses->extended) {
+ stat->extension = buf_get_stringb(bufp, dbufp);
+ stat->n_uid = buf_get_int32(bufp);
+ stat->n_gid = buf_get_int32(bufp);
+ stat->n_muid = buf_get_int32(bufp);
+ }
+
+ if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+ return 0;
+
+ return stat->size + 2;
+}
+
+/**
+ * deserialize_statb - wrapper for decoding a received metadata structure
+ * @v9ses: session info
+ * @bufp: buffer to deserialize
+ * @dbufp: buffer to deserialize variable strings into
+ *
+ */
+
+static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
+ *v9ses, struct cbuf *bufp,
+ struct cbuf *dbufp)
+{
+ struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
+
+ if (ret) {
+ int n = deserialize_stat(v9ses, bufp, ret, dbufp);
+ if (n <= 0)
+ return NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * v9fs_deserialize_stat - decode a received metadata structure
+ * @v9ses: session info
+ * @buf: buffer to deserialize
+ * @buflen: length of received buffer
+ * @stat: metadata structure to decode into
+ * @statlen: length of destination metadata structure
+ *
+ */
+
+int
+v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
+ u32 buflen, struct v9fs_stat *stat, u32 statlen)
+{
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+ struct cbuf dbuffer;
+ struct cbuf *dbufp = &dbuffer;
+
+ buf_init(bufp, buf, buflen);
+ buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
+ statlen - sizeof(struct v9fs_stat));
+
+ return deserialize_stat(v9ses, bufp, stat, dbufp);
+}
+
+static inline int
+v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
+{
+ int size = 4 + 1 + 2; /* size[4] msg[1] tag[2] */
+ int i = 0;
+
+ switch (fcall->id) {
+ default:
+ eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
+ return 0;
+ case TVERSION: /* msize[4] version[s] */
+ size += 4 + 2 + strlen(fcall->params.tversion.version);
+ break;
+ case TAUTH: /* afid[4] uname[s] aname[s] */
+ size += 4 + 2 + strlen(fcall->params.tauth.uname) +
+ 2 + strlen(fcall->params.tauth.aname);
+ break;
+ case TFLUSH: /* oldtag[2] */
+ size += 2;
+ break;
+ case TATTACH: /* fid[4] afid[4] uname[s] aname[s] */
+ size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
+ 2 + strlen(fcall->params.tattach.aname);
+ break;
+ case TWALK: /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
+ size += 4 + 4 + 2;
+ /* now compute total for the array of names */
+ for (i = 0; i < fcall->params.twalk.nwname; i++)
+ size += 2 + strlen(fcall->params.twalk.wnames[i]);
+ break;
+ case TOPEN: /* fid[4] mode[1] */
+ size += 4 + 1;
+ break;
+ case TCREATE: /* fid[4] name[s] perm[4] mode[1] */
+ size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
+ break;
+ case TREAD: /* fid[4] offset[8] count[4] */
+ size += 4 + 8 + 4;
+ break;
+ case TWRITE: /* fid[4] offset[8] count[4] data[count] */
+ size += 4 + 8 + 4 + fcall->params.twrite.count;
+ break;
+ case TCLUNK: /* fid[4] */
+ size += 4;
+ break;
+ case TREMOVE: /* fid[4] */
+ size += 4;
+ break;
+ case TSTAT: /* fid[4] */
+ size += 4;
+ break;
+ case TWSTAT: /* fid[4] stat[n] */
+ fcall->params.twstat.stat->size =
+ v9fs_size_stat(v9ses, fcall->params.twstat.stat);
+ size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
+ }
+ return size;
+}
+
+/*
+ * v9fs_serialize_fcall - marshall fcall struct into a packet
+ * @v9ses: session information
+ * @fcall: structure to convert
+ * @data: buffer to serialize fcall into
+ * @datalen: length of buffer to serialize fcall into
+ *
+ */
+
+int
+v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
+ void *data, u32 datalen)
+{
+ int i = 0;
+ struct v9fs_stat *stat = NULL;
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+
+ buf_init(bufp, data, datalen);
+
+ if (!fcall) {
+ eprintk(KERN_ERR, "no fcall\n");
+ return -EINVAL;
+ }
+
+ fcall->size = v9fs_size_fcall(v9ses, fcall);
+
+ buf_put_int32(bufp, fcall->size);
+ buf_put_int8(bufp, fcall->id);
+ buf_put_int16(bufp, fcall->tag);
+
+ dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
+ fcall->tag);
+
+ /* now encode it */
+ switch (fcall->id) {
+ default:
+ eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
+ return -EPROTO;
+ case TVERSION:
+ buf_put_int32(bufp, fcall->params.tversion.msize);
+ buf_put_string(bufp, fcall->params.tversion.version);
+ break;
+ case TAUTH:
+ buf_put_int32(bufp, fcall->params.tauth.afid);
+ buf_put_string(bufp, fcall->params.tauth.uname);
+ buf_put_string(bufp, fcall->params.tauth.aname);
+ break;
+ case TFLUSH:
+ buf_put_int16(bufp, fcall->params.tflush.oldtag);
+ break;
+ case TATTACH:
+ buf_put_int32(bufp, fcall->params.tattach.fid);
+ buf_put_int32(bufp, fcall->params.tattach.afid);
+ buf_put_string(bufp, fcall->params.tattach.uname);
+ buf_put_string(bufp, fcall->params.tattach.aname);
+ break;
+ case TWALK:
+ buf_put_int32(bufp, fcall->params.twalk.fid);
+ buf_put_int32(bufp, fcall->params.twalk.newfid);
+ buf_put_int16(bufp, fcall->params.twalk.nwname);
+ for (i = 0; i < fcall->params.twalk.nwname; i++)
+ buf_put_string(bufp, fcall->params.twalk.wnames[i]);
+ break;
+ case TOPEN:
+ buf_put_int32(bufp, fcall->params.topen.fid);
+ buf_put_int8(bufp, fcall->params.topen.mode);
+ break;
+ case TCREATE:
+ buf_put_int32(bufp, fcall->params.tcreate.fid);
+ buf_put_string(bufp, fcall->params.tcreate.name);
+ buf_put_int32(bufp, fcall->params.tcreate.perm);
+ buf_put_int8(bufp, fcall->params.tcreate.mode);
+ break;
+ case TREAD:
+ buf_put_int32(bufp, fcall->params.tread.fid);
+ buf_put_int64(bufp, fcall->params.tread.offset);
+ buf_put_int32(bufp, fcall->params.tread.count);
+ break;
+ case TWRITE:
+ buf_put_int32(bufp, fcall->params.twrite.fid);
+ buf_put_int64(bufp, fcall->params.twrite.offset);
+ buf_put_int32(bufp, fcall->params.twrite.count);
+ buf_put_data(bufp, fcall->params.twrite.data,
+ fcall->params.twrite.count);
+ break;
+ case TCLUNK:
+ buf_put_int32(bufp, fcall->params.tclunk.fid);
+ break;
+ case TREMOVE:
+ buf_put_int32(bufp, fcall->params.tremove.fid);
+ break;
+ case TSTAT:
+ buf_put_int32(bufp, fcall->params.tstat.fid);
+ break;
+ case TWSTAT:
+ buf_put_int32(bufp, fcall->params.twstat.fid);
+ stat = fcall->params.twstat.stat;
+
+ buf_put_int16(bufp, stat->size + 2);
+ serialize_stat(v9ses, stat, bufp);
+ break;
+ }
+
+ if (buf_check_overflow(bufp))
+ return -EIO;
+
+ return fcall->size;
+}
+
+/**
+ * deserialize_fcall - unmarshal a response
+ * @v9ses: session information
+ * @msgsize: size of rcall message
+ * @buf: recieved buffer
+ * @buflen: length of received buffer
+ * @rcall: fcall structure to populate
+ * @rcalllen: length of fcall structure to populate
+ *
+ */
+
+int
+v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
+ void *buf, u32 buflen, struct v9fs_fcall *rcall,
+ int rcalllen)
+{
+
+ struct cbuf buffer;
+ struct cbuf *bufp = &buffer;
+ struct cbuf dbuffer;
+ struct cbuf *dbufp = &dbuffer;
+ int i = 0;
+
+ buf_init(bufp, buf, buflen);
+ buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
+ rcalllen - sizeof(struct v9fs_fcall));
+
+ rcall->size = msgsize;
+ rcall->id = buf_get_int8(bufp);
+ rcall->tag = buf_get_int16(bufp);
+
+ dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
+ rcall->tag);
+ switch (rcall->id) {
+ default:
+ eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
+ return -EPROTO;
+ case RVERSION:
+ rcall->params.rversion.msize = buf_get_int32(bufp);
+ rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+ break;
+ case RFLUSH:
+ break;
+ case RATTACH:
+ rcall->params.rattach.qid.type = buf_get_int8(bufp);
+ rcall->params.rattach.qid.version = buf_get_int32(bufp);
+ rcall->params.rattach.qid.path = buf_get_int64(bufp);
+ break;
+ case RWALK:
+ rcall->params.rwalk.nwqid = buf_get_int16(bufp);
+ rcall->params.rwalk.wqids = buf_alloc(bufp,
+ rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
+ if (rcall->params.rwalk.wqids)
+ for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
+ rcall->params.rwalk.wqids[i].type =
+ buf_get_int8(bufp);
+ rcall->params.rwalk.wqids[i].version =
+ buf_get_int16(bufp);
+ rcall->params.rwalk.wqids[i].path =
+ buf_get_int64(bufp);
+ }
+ break;
+ case ROPEN:
+ rcall->params.ropen.qid.type = buf_get_int8(bufp);
+ rcall->params.ropen.qid.version = buf_get_int32(bufp);
+ rcall->params.ropen.qid.path = buf_get_int64(bufp);
+ rcall->params.ropen.iounit = buf_get_int32(bufp);
+ break;
+ case RCREATE:
+ rcall->params.rcreate.qid.type = buf_get_int8(bufp);
+ rcall->params.rcreate.qid.version = buf_get_int32(bufp);
+ rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+ rcall->params.rcreate.iounit = buf_get_int32(bufp);
+ break;
+ case RREAD:
+ rcall->params.rread.count = buf_get_int32(bufp);
+ rcall->params.rread.data = buf_get_datab(bufp, dbufp,
+ rcall->params.rread.count);
+ break;
+ case RWRITE:
+ rcall->params.rwrite.count = buf_get_int32(bufp);
+ break;
+ case RCLUNK:
+ break;
+ case RREMOVE:
+ break;
+ case RSTAT:
+ buf_get_int16(bufp);
+ rcall->params.rstat.stat =
+ deserialize_statb(v9ses, bufp, dbufp);
+ break;
+ case RWSTAT:
+ break;
+ case RERROR:
+ rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
+ if (v9ses->extended)
+ rcall->params.rerror.errno = buf_get_int16(bufp);
+ break;
+ }
+
+ if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+ return -EIO;
+
+ return rcall->size;
+}
diff --git a/fs/9p/conv.h b/fs/9p/conv.h
new file mode 100644
index 000000000000..ee849613c61a
--- /dev/null
+++ b/fs/9p/conv.h
@@ -0,0 +1,36 @@
+/*
+ * linux/fs/9p/conv.h
+ *
+ * 9P protocol conversion definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
+ u32 buflen, struct v9fs_stat *stat, u32 statlen);
+int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
+ void *buf, u32 buflen);
+int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
+ void *buf, u32 buflen, struct v9fs_fcall *rcall,
+ int rcalllen);
+
+/* this one is actually in error.c right now */
+int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/debug.h b/fs/9p/debug.h
new file mode 100644
index 000000000000..4445f06919d9
--- /dev/null
+++ b/fs/9p/debug.h
@@ -0,0 +1,70 @@
+/*
+ * linux/fs/9p/debug.h - V9FS Debug Definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#define DEBUG_ERROR (1<<0)
+#define DEBUG_CURRENT (1<<1)
+#define DEBUG_9P (1<<2)
+#define DEBUG_VFS (1<<3)
+#define DEBUG_CONV (1<<4)
+#define DEBUG_MUX (1<<5)
+#define DEBUG_TRANS (1<<6)
+#define DEBUG_SLABS (1<<7)
+
+#define DEBUG_DUMP_PKT 0
+
+extern int v9fs_debug_level;
+
+#define dprintk(level, format, arg...) \
+do { \
+ if((v9fs_debug_level & level)==level) \
+ printk(KERN_NOTICE "-- %s (%d): " \
+ format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#define eprintk(level, format, arg...) \
+do { \
+ printk(level "v9fs: %s (%d): " \
+ format , __FUNCTION__, current->pid , ## arg); \
+} while(0)
+
+#if DEBUG_DUMP_PKT
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+ int i, j;
+ int len = datalen;
+
+ printk(KERN_DEBUG "data ");
+ for (i = 0; i < len; i += 4) {
+ for (j = 0; (j < 4) && (i + j < len); j++)
+ printk(KERN_DEBUG "%02x", data[i + j]);
+ printk(KERN_DEBUG " ");
+ }
+ printk(KERN_DEBUG "\n");
+}
+#else /* DEBUG_DUMP_PKT */
+static inline void dump_data(const unsigned char *data, unsigned int datalen)
+{
+
+}
+#endif /* DEBUG_DUMP_PKT */
diff --git a/fs/9p/error.c b/fs/9p/error.c
new file mode 100644
index 000000000000..fee5d19179c5
--- /dev/null
+++ b/fs/9p/error.c
@@ -0,0 +1,93 @@
+/*
+ * linux/fs/9p/error.c
+ *
+ * Error string handling
+ *
+ * Plan 9 uses error strings, Unix uses error numbers. These functions
+ * try to help manage that and provide for dynamically adding error
+ * mappings.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+#include "debug.h"
+#include "error.h"
+
+/**
+ * v9fs_error_init - preload
+ * @errstr: error string
+ *
+ */
+
+int v9fs_error_init(void)
+{
+ struct errormap *c;
+ int bucket;
+
+ /* initialize hash table */
+ for (bucket = 0; bucket < ERRHASHSZ; bucket++)
+ INIT_HLIST_HEAD(&hash_errmap[bucket]);
+
+ /* load initial error map into hash table */
+ for (c = errmap; c->name != NULL; c++) {
+ bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+ INIT_HLIST_NODE(&c->list);
+ hlist_add_head(&c->list, &hash_errmap[bucket]);
+ }
+
+ return 1;
+}
+
+/**
+ * errstr2errno - convert error string to error number
+ * @errstr: error string
+ *
+ */
+
+int v9fs_errstr2errno(char *errstr)
+{
+ int errno = 0;
+ struct hlist_node *p = NULL;
+ struct errormap *c = NULL;
+ int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+
+ hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+ if (!strcmp(c->name, errstr)) {
+ errno = c->val;
+ break;
+ }
+ }
+
+ if (errno == 0) {
+ /* TODO: if error isn't found, add it dynamically */
+ printk(KERN_ERR "%s: errstr :%s: not found\n", __FUNCTION__,
+ errstr);
+ errno = 1;
+ }
+
+ return -errno;
+}
diff --git a/fs/9p/error.h b/fs/9p/error.h
new file mode 100644
index 000000000000..78f89acf7c9a
--- /dev/null
+++ b/fs/9p/error.h
@@ -0,0 +1,178 @@
+/*
+ * linux/fs/9p/error.h
+ *
+ * Huge Nasty Error Table
+ *
+ * Plan 9 uses error strings, Unix uses error numbers. This table tries to
+ * match UNIX strings and Plan 9 strings to unix error numbers. It is used
+ * to preload the dynamic error table which can also track user-specific error
+ * strings.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/errno.h>
+#include <asm/errno.h>
+
+struct errormap {
+ char *name;
+ int val;
+
+ struct hlist_node list;
+};
+
+#define ERRHASHSZ 32
+static struct hlist_head hash_errmap[ERRHASHSZ];
+
+/* FixMe - reduce to a reasonable size */
+static struct errormap errmap[] = {
+ {"Operation not permitted", EPERM},
+ {"wstat prohibited", EPERM},
+ {"No such file or directory", ENOENT},
+ {"directory entry not found", ENOENT},
+ {"file not found", ENOENT},
+ {"Interrupted system call", EINTR},
+ {"Input/output error", EIO},
+ {"No such device or address", ENXIO},
+ {"Argument list too long", E2BIG},
+ {"Bad file descriptor", EBADF},
+ {"Resource temporarily unavailable", EAGAIN},
+ {"Cannot allocate memory", ENOMEM},
+ {"Permission denied", EACCES},
+ {"Bad address", EFAULT},
+ {"Block device required", ENOTBLK},
+ {"Device or resource busy", EBUSY},
+ {"File exists", EEXIST},
+ {"Invalid cross-device link", EXDEV},
+ {"No such device", ENODEV},
+ {"Not a directory", ENOTDIR},
+ {"Is a directory", EISDIR},
+ {"Invalid argument", EINVAL},
+ {"Too many open files in system", ENFILE},
+ {"Too many open files", EMFILE},
+ {"Text file busy", ETXTBSY},
+ {"File too large", EFBIG},
+ {"No space left on device", ENOSPC},
+ {"Illegal seek", ESPIPE},
+ {"Read-only file system", EROFS},
+ {"Too many links", EMLINK},
+ {"Broken pipe", EPIPE},
+ {"Numerical argument out of domain", EDOM},
+ {"Numerical result out of range", ERANGE},
+ {"Resource deadlock avoided", EDEADLK},
+ {"File name too long", ENAMETOOLONG},
+ {"No locks available", ENOLCK},
+ {"Function not implemented", ENOSYS},
+ {"Directory not empty", ENOTEMPTY},
+ {"Too many levels of symbolic links", ELOOP},
+ {"No message of desired type", ENOMSG},
+ {"Identifier removed", EIDRM},
+ {"No data available", ENODATA},
+ {"Machine is not on the network", ENONET},
+ {"Package not installed", ENOPKG},
+ {"Object is remote", EREMOTE},
+ {"Link has been severed", ENOLINK},
+ {"Communication error on send", ECOMM},
+ {"Protocol error", EPROTO},
+ {"Bad message", EBADMSG},
+ {"File descriptor in bad state", EBADFD},
+ {"Streams pipe error", ESTRPIPE},
+ {"Too many users", EUSERS},
+ {"Socket operation on non-socket", ENOTSOCK},
+ {"Message too long", EMSGSIZE},
+ {"Protocol not available", ENOPROTOOPT},
+ {"Protocol not supported", EPROTONOSUPPORT},
+ {"Socket type not supported", ESOCKTNOSUPPORT},
+ {"Operation not supported", EOPNOTSUPP},
+ {"Protocol family not supported", EPFNOSUPPORT},
+ {"Network is down", ENETDOWN},
+ {"Network is unreachable", ENETUNREACH},
+ {"Network dropped connection on reset", ENETRESET},
+ {"Software caused connection abort", ECONNABORTED},
+ {"Connection reset by peer", ECONNRESET},
+ {"No buffer space available", ENOBUFS},
+ {"Transport endpoint is already connected", EISCONN},
+ {"Transport endpoint is not connected", ENOTCONN},
+ {"Cannot send after transport endpoint shutdown", ESHUTDOWN},
+ {"Connection timed out", ETIMEDOUT},
+ {"Connection refused", ECONNREFUSED},
+ {"Host is down", EHOSTDOWN},
+ {"No route to host", EHOSTUNREACH},
+ {"Operation already in progress", EALREADY},
+ {"Operation now in progress", EINPROGRESS},
+ {"Is a named type file", EISNAM},
+ {"Remote I/O error", EREMOTEIO},
+ {"Disk quota exceeded", EDQUOT},
+/* errors from fossil, vacfs, and u9fs */
+ {"fid unknown or out of range", EBADF},
+ {"permission denied", EACCES},
+ {"file does not exist", ENOENT},
+ {"authentication failed", ECONNREFUSED},
+ {"bad offset in directory read", ESPIPE},
+ {"bad use of fid", EBADF},
+ {"wstat can't convert between files and directories", EPERM},
+ {"directory is not empty", ENOTEMPTY},
+ {"file exists", EEXIST},
+ {"file already exists", EEXIST},
+ {"file or directory already exists", EEXIST},
+ {"fid already in use", EBADF},
+ {"file in use", ETXTBSY},
+ {"i/o error", EIO},
+ {"file already open for I/O", ETXTBSY},
+ {"illegal mode", EINVAL},
+ {"illegal name", ENAMETOOLONG},
+ {"not a directory", ENOTDIR},
+ {"not a member of proposed group", EPERM},
+ {"not owner", EACCES},
+ {"only owner can change group in wstat", EACCES},
+ {"read only file system", EROFS},
+ {"no access to special file", EPERM},
+ {"i/o count too large", EIO},
+ {"unknown group", EINVAL},
+ {"unknown user", EINVAL},
+ {"bogus wstat buffer", EPROTO},
+ {"exclusive use file already open", EAGAIN},
+ {"corrupted directory entry", EIO},
+ {"corrupted file entry", EIO},
+ {"corrupted block label", EIO},
+ {"corrupted meta data", EIO},
+ {"illegal offset", EINVAL},
+ {"illegal path element", ENOENT},
+ {"root of file system is corrupted", EIO},
+ {"corrupted super block", EIO},
+ {"protocol botch", EPROTO},
+ {"file system is full", ENOSPC},
+ {"file is in use", EAGAIN},
+ {"directory entry is not allocated", ENOENT},
+ {"file is read only", EROFS},
+ {"file has been removed", EIDRM},
+ {"only support truncation to zero length", EPERM},
+ {"cannot remove root", EPERM},
+ {"file too big", EFBIG},
+ {"venti i/o error", EIO},
+ /* these are not errors */
+ {"u9fs rhostsauth: no authentication required", 0},
+ {"u9fs authnone: no authentication required", 0},
+ {NULL, -1}
+};
+
+extern int v9fs_error_init(void);
+extern int v9fs_errstr2errno(char *errstr);
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
new file mode 100644
index 000000000000..821c9c4d76aa
--- /dev/null
+++ b/fs/9p/fid.c
@@ -0,0 +1,241 @@
+/*
+ * V9FS FID Management
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_fid_insert - add a fid to a dentry
+ * @fid: fid to add
+ * @dentry: dentry that it is being added to
+ *
+ */
+
+static int v9fs_fid_insert(struct v9fs_fid *fid, struct dentry *dentry)
+{
+ struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+ dprintk(DEBUG_9P, "fid %d (%p) dentry %s (%p)\n", fid->fid, fid,
+ dentry->d_iname, dentry);
+ if (dentry->d_fsdata == NULL) {
+ dentry->d_fsdata =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (dentry->d_fsdata == NULL) {
+ dprintk(DEBUG_ERROR, "Out of memory\n");
+ return -ENOMEM;
+ }
+ fid_list = (struct list_head *)dentry->d_fsdata;
+ INIT_LIST_HEAD(fid_list); /* Initialize list head */
+ }
+
+ fid->uid = current->uid;
+ fid->pid = current->pid;
+ list_add(&fid->list, fid_list);
+ return 0;
+}
+
+/**
+ * v9fs_fid_create - allocate a FID structure
+ * @dentry - dentry to link newly created fid to
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_create(struct dentry *dentry)
+{
+ struct v9fs_fid *new;
+
+ new = kmalloc(sizeof(struct v9fs_fid), GFP_KERNEL);
+ if (new == NULL) {
+ dprintk(DEBUG_ERROR, "Out of Memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ new->fid = -1;
+ new->fidopen = 0;
+ new->fidcreate = 0;
+ new->fidclunked = 0;
+ new->iounit = 0;
+
+ if (v9fs_fid_insert(new, dentry) == 0)
+ return new;
+ else {
+ dprintk(DEBUG_ERROR, "Problems inserting to dentry\n");
+ kfree(new);
+ return NULL;
+ }
+}
+
+/**
+ * v9fs_fid_destroy - deallocate a FID structure
+ * @fid: fid to destroy
+ *
+ */
+
+void v9fs_fid_destroy(struct v9fs_fid *fid)
+{
+ list_del(&fid->list);
+ kfree(fid);
+}
+
+/**
+ * v9fs_fid_lookup - retrieve the right fid from a particular dentry
+ * @dentry: dentry to look for fid in
+ * @type: intent of lookup (operation or traversal)
+ *
+ * search list of fids associated with a dentry for a fid with a matching
+ * thread id or uid. If that fails, look up the dentry's parents to see if you
+ * can find a matching fid.
+ *
+ */
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type)
+{
+ struct list_head *fid_list = (struct list_head *)dentry->d_fsdata;
+ struct v9fs_fid *current_fid = NULL;
+ struct v9fs_fid *temp = NULL;
+ struct v9fs_fid *return_fid = NULL;
+ int found_parent = 0;
+ int found_user = 0;
+
+ dprintk(DEBUG_9P, " dentry: %s (%p) type %d\n", dentry->d_iname, dentry,
+ type);
+
+ if (fid_list && !list_empty(fid_list)) {
+ list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+ if (current_fid->uid == current->uid) {
+ if (return_fid == NULL) {
+ if ((type == FID_OP)
+ || (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_user = 1;
+ }
+ }
+ }
+ if (current_fid->pid == current->real_parent->pid) {
+ if ((return_fid == NULL) || (found_parent)
+ || (found_user)) {
+ if ((type == FID_OP)
+ || (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_parent = 1;
+ found_user = 0;
+ }
+ }
+ }
+ if (current_fid->pid == current->pid) {
+ if ((type == FID_OP) ||
+ (!current_fid->fidopen)) {
+ return_fid = current_fid;
+ found_parent = 0;
+ found_user = 0;
+ }
+ }
+ }
+ }
+
+ /* we are at the root but didn't match */
+ if ((!return_fid) && (dentry->d_parent == dentry)) {
+ /* TODO: clone attach with new uid */
+ return_fid = current_fid;
+ }
+
+ if (!return_fid) {
+ struct dentry *par = current->fs->pwd->d_parent;
+ int count = 1;
+ while (par != NULL) {
+ if (par == dentry)
+ break;
+ count++;
+ if (par == par->d_parent) {
+ dprintk(DEBUG_ERROR,
+ "got to root without finding dentry\n");
+ break;
+ }
+ par = par->d_parent;
+ }
+
+/* XXX - there may be some duplication we can get rid of */
+ if (par == dentry) {
+ /* we need to fid_lookup the starting point */
+ int fidnum = -1;
+ int oldfid = -1;
+ int result = -1;
+ struct v9fs_session_info *v9ses =
+ v9fs_inode2v9ses(current->fs->pwd->d_inode);
+
+ current_fid =
+ v9fs_fid_lookup(current->fs->pwd, FID_WALK);
+ if (current_fid == NULL) {
+ dprintk(DEBUG_ERROR,
+ "process cwd doesn't have a fid\n");
+ return return_fid;
+ }
+ oldfid = current_fid->fid;
+ par = current->fs->pwd;
+ /* TODO: take advantage of multiwalk */
+
+ fidnum = v9fs_get_idpool(&v9ses->fidpool);
+ if (fidnum < 0) {
+ dprintk(DEBUG_ERROR,
+ "could not get a new fid num\n");
+ return return_fid;
+ }
+
+ while (par != dentry) {
+ result =
+ v9fs_t_walk(v9ses, oldfid, fidnum, "..",
+ NULL);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR,
+ "problem walking to parent\n");
+
+ break;
+ }
+ oldfid = fidnum;
+ if (par == par->d_parent) {
+ dprintk(DEBUG_ERROR,
+ "can't find dentry\n");
+ break;
+ }
+ par = par->d_parent;
+ }
+ if (par == dentry) {
+ return_fid = v9fs_fid_create(dentry);
+ return_fid->fid = fidnum;
+ }
+ }
+ }
+
+ return return_fid;
+}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
new file mode 100644
index 000000000000..7db478ccca36
--- /dev/null
+++ b/fs/9p/fid.h
@@ -0,0 +1,57 @@
+/*
+ * V9FS FID Management
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/list.h>
+
+#define FID_OP 0
+#define FID_WALK 1
+
+struct v9fs_fid {
+ struct list_head list; /* list of fids associated with a dentry */
+ struct list_head active; /* XXX - debug */
+
+ u32 fid;
+ unsigned char fidopen; /* set when fid is opened */
+ unsigned char fidcreate; /* set when fid was just created */
+ unsigned char fidclunked; /* set when fid has already been clunked */
+
+ struct v9fs_qid qid;
+ u32 iounit;
+
+ /* readdir stuff */
+ int rdir_fpos;
+ loff_t rdir_pos;
+ struct v9fs_fcall *rdir_fcall;
+
+ /* management stuff */
+ pid_t pid; /* thread associated with this fid */
+ uid_t uid; /* user associated with this fid */
+
+ /* private data */
+ struct file *filp; /* backpointer to File struct for open files */
+ struct v9fs_session_info *v9ses; /* session info for this FID */
+};
+
+struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry, int type);
+void v9fs_fid_destroy(struct v9fs_fid *fid);
+struct v9fs_fid *v9fs_fid_create(struct dentry *);
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
new file mode 100644
index 000000000000..8835b576f744
--- /dev/null
+++ b/fs/9p/mux.c
@@ -0,0 +1,475 @@
+/*
+ * linux/fs/9p/mux.c
+ *
+ * Protocol Multiplexer
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kthread.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "transport.h"
+#include "conv.h"
+#include "mux.h"
+
+/**
+ * dprintcond - print condition of session info
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static inline int
+dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
+ req->rcall);
+ return 0;
+}
+
+/**
+ * xread - force read of a certain number of bytes
+ * @v9ses: session info structure
+ * @ptr: pointer to buffer
+ * @sz: number of bytes to read
+ *
+ * Chuck Cranor CS-533 project1
+ */
+
+static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+{
+ int rd = 0;
+ int ret = 0;
+ while (rd < sz) {
+ ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
+ if (ret <= 0) {
+ dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
+ return ret;
+ }
+ rd += ret;
+ ptr += ret;
+ }
+ return (rd);
+}
+
+/**
+ * read_message - read a full 9P2000 fcall packet
+ * @v9ses: session info structure
+ * @rcall: fcall structure to read into
+ * @rcalllen: size of fcall buffer
+ *
+ */
+
+static int
+read_message(struct v9fs_session_info *v9ses,
+ struct v9fs_fcall *rcall, int rcalllen)
+{
+ unsigned char buf[4];
+ void *data;
+ int size = 0;
+ int res = 0;
+
+ res = xread(v9ses, buf, sizeof(buf));
+ if (res < 0) {
+ dprintk(DEBUG_ERROR,
+ "Reading of count field failed returned: %d\n", res);
+ return res;
+ }
+
+ if (res < 4) {
+ dprintk(DEBUG_ERROR,
+ "Reading of count field failed returned: %d\n", res);
+ return -EIO;
+ }
+
+ size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+ dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+
+ /* adjust for the four bytes of size */
+ size -= 4;
+
+ if (size > v9ses->maxdata) {
+ dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
+ return -E2BIG;
+ }
+
+ data = kmalloc(size, GFP_KERNEL);
+ if (!data) {
+ eprintk(KERN_WARNING, "out of memory\n");
+ return -ENOMEM;
+ }
+
+ res = xread(v9ses, data, size);
+ if (res < size) {
+ dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
+ res);
+ kfree(data);
+ return res;
+ }
+
+ /* we now have an in-memory string that is the reply.
+ * deserialize it. There is very little to go wrong at this point
+ * save for v9fs_alloc errors.
+ */
+ res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
+ rcall, rcalllen);
+
+ kfree(data);
+
+ if (res < 0)
+ return res;
+
+ return 0;
+}
+
+/**
+ * v9fs_recv - receive an RPC response for a particular tag
+ * @v9ses: session info structure
+ * @req: RPC request structure
+ *
+ */
+
+static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ int ret = 0;
+
+ dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
+ ret = wait_event_interruptible(v9ses->read_wait,
+ ((v9ses->transport->status != Connected) ||
+ (req->rcall != 0) || (req->err < 0) ||
+ dprintcond(v9ses, req)));
+
+ dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+
+ spin_lock(&v9ses->muxlock);
+ list_del(&req->next);
+ spin_unlock(&v9ses->muxlock);
+
+ if (req->err < 0)
+ return req->err;
+
+ if (v9ses->transport->status == Disconnected)
+ return -ECONNRESET;
+
+ return ret;
+}
+
+/**
+ * v9fs_send - send a 9P request
+ * @v9ses: session info structure
+ * @req: RPC request to send
+ *
+ */
+
+static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+{
+ int ret = -1;
+ void *data = NULL;
+ struct v9fs_fcall *tcall = req->tcall;
+
+ data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ tcall->size = 0; /* enforce size recalculation */
+ ret =
+ v9fs_serialize_fcall(v9ses, tcall, data,
+ v9ses->maxdata + V9FS_IOHDRSZ);
+ if (ret < 0)
+ goto free_data;
+
+ spin_lock(&v9ses->muxlock);
+ list_add(&req->next, &v9ses->mux_fcalls);
+ spin_unlock(&v9ses->muxlock);
+
+ dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
+ tcall->size);
+ ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+
+ if (ret != tcall->size) {
+ spin_lock(&v9ses->muxlock);
+ list_del(&req->next);
+ kfree(req->rcall);
+
+ spin_unlock(&v9ses->muxlock);
+ if (ret >= 0)
+ ret = -EREMOTEIO;
+ } else
+ ret = 0;
+
+ free_data:
+ kfree(data);
+ return ret;
+}
+
+/**
+ * v9fs_mux_rpc - send a request, receive a response
+ * @v9ses: session info structure
+ * @tcall: fcall to send
+ * @rcall: buffer to place response into
+ *
+ */
+
+long
+v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
+ struct v9fs_fcall **rcall)
+{
+ int tid = -1;
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_rpcreq req;
+ int ret = -1;
+
+ if (!v9ses)
+ return -EINVAL;
+
+ if (!v9ses->transport || v9ses->transport->status != Connected)
+ return -EIO;
+
+ if (rcall)
+ *rcall = NULL;
+
+ if (tcall->id != TVERSION) {
+ tid = v9fs_get_idpool(&v9ses->tidpool);
+ if (tid < 0)
+ return -ENOMEM;
+ }
+
+ tcall->tag = tid;
+
+ req.tcall = tcall;
+ req.err = 0;
+ req.rcall = NULL;
+
+ ret = v9fs_send(v9ses, &req);
+
+ if (ret < 0) {
+ if (tcall->id != TVERSION)
+ v9fs_put_idpool(tid, &v9ses->tidpool);
+ dprintk(DEBUG_MUX, "error %d\n", ret);
+ return ret;
+ }
+
+ ret = v9fs_recv(v9ses, &req);
+
+ fcall = req.rcall;
+
+ dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
+ if (ret == -ERESTARTSYS) {
+ if (v9ses->transport->status != Disconnected
+ && tcall->id != TFLUSH) {
+ unsigned long flags;
+
+ dprintk(DEBUG_MUX, "flushing the tag: %d\n",
+ tcall->tag);
+ clear_thread_flag(TIF_SIGPENDING);
+ v9fs_t_flush(v9ses, tcall->tag);
+ spin_lock_irqsave(&current->sighand->siglock, flags);
+ recalc_sigpending();
+ spin_unlock_irqrestore(&current->sighand->siglock,
+ flags);
+ dprintk(DEBUG_MUX, "flushing done\n");
+ }
+
+ goto release_req;
+ } else if (ret < 0)
+ goto release_req;
+
+ if (!fcall)
+ ret = -EIO;
+ else {
+ if (fcall->id == RERROR) {
+ ret = v9fs_errstr2errno(fcall->params.rerror.error);
+ if (ret == 0) { /* string match failed */
+ if (fcall->params.rerror.errno)
+ ret = -(fcall->params.rerror.errno);
+ else
+ ret = -ESERVERFAULT;
+ }
+ } else if (fcall->id != tcall->id + 1) {
+ dprintk(DEBUG_ERROR,
+ "fcall mismatch: expected %d, got %d\n",
+ tcall->id + 1, fcall->id);
+ ret = -EIO;
+ }
+ }
+
+ release_req:
+ if (tcall->id != TVERSION)
+ v9fs_put_idpool(tid, &v9ses->tidpool);
+ if (rcall)
+ *rcall = fcall;
+ else
+ kfree(fcall);
+
+ return ret;
+}
+
+/**
+ * v9fs_mux_cancel_requests - cancels all pending requests
+ *
+ * @v9ses: session info structure
+ * @err: error code to return to the requests
+ */
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+{
+ struct v9fs_rpcreq *rptr;
+ struct v9fs_rpcreq *rreq;
+
+ dprintk(DEBUG_MUX, " %d\n", err);
+ spin_lock(&v9ses->muxlock);
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ rreq->err = err;
+ }
+ spin_unlock(&v9ses->muxlock);
+ wake_up_all(&v9ses->read_wait);
+}
+
+/**
+ * v9fs_recvproc - kproc to handle demultiplexing responses
+ * @data: session info structure
+ *
+ */
+
+static int v9fs_recvproc(void *data)
+{
+ struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
+ struct v9fs_fcall *rcall = NULL;
+ struct v9fs_rpcreq *rptr;
+ struct v9fs_rpcreq *req;
+ struct v9fs_rpcreq *rreq;
+ int err = 0;
+
+ allow_signal(SIGKILL);
+ set_current_state(TASK_INTERRUPTIBLE);
+ complete(&v9ses->proccmpl);
+ while (!kthread_should_stop() && err >= 0) {
+ req = rptr = rreq = NULL;
+
+ rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
+ if (!rcall) {
+ eprintk(KERN_ERR, "no memory for buffers\n");
+ break;
+ }
+
+ err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
+ spin_lock(&v9ses->muxlock);
+ if (err < 0) {
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ rreq->err = err;
+ }
+ if(err != -ERESTARTSYS)
+ eprintk(KERN_ERR,
+ "Transport error while reading message %d\n", err);
+ } else {
+ list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
+ if (rreq->tcall->tag == rcall->tag) {
+ req = rreq;
+ req->rcall = rcall;
+ break;
+ }
+ }
+ }
+
+ if (req && (req->tcall->id == TFLUSH)) {
+ struct v9fs_rpcreq *treq = NULL;
+ list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
+ if (treq->tcall->tag ==
+ req->tcall->params.tflush.oldtag) {
+ list_del(&rptr->next);
+ kfree(treq->rcall);
+ break;
+ }
+ }
+ }
+
+ spin_unlock(&v9ses->muxlock);
+
+ if (!req) {
+ if (err >= 0)
+ dprintk(DEBUG_ERROR,
+ "unexpected response: id %d tag %d\n",
+ rcall->id, rcall->tag);
+
+ kfree(rcall);
+ }
+
+ wake_up_all(&v9ses->read_wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ v9ses->transport->close(v9ses->transport);
+
+ /* Inform all pending processes about the failure */
+ wake_up_all(&v9ses->read_wait);
+
+ if (signal_pending(current))
+ complete(&v9ses->proccmpl);
+
+ dprintk(DEBUG_MUX, "recvproc: end\n");
+ v9ses->recvproc = NULL;
+
+ return err >= 0;
+}
+
+/**
+ * v9fs_mux_init - initialize multiplexer (spawn kproc)
+ * @v9ses: session info structure
+ * @dev_name: mount device information (to create unique kproc)
+ *
+ */
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+{
+ char procname[60];
+
+ strncpy(procname, dev_name, sizeof(procname));
+ procname[sizeof(procname) - 1] = 0;
+
+ init_waitqueue_head(&v9ses->read_wait);
+ init_completion(&v9ses->fcread);
+ init_completion(&v9ses->proccmpl);
+ spin_lock_init(&v9ses->muxlock);
+ INIT_LIST_HEAD(&v9ses->mux_fcalls);
+ v9ses->recvproc = NULL;
+ v9ses->curfcall = NULL;
+
+ v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
+ "v9fs_recvproc %s", procname);
+
+ if (IS_ERR(v9ses->recvproc)) {
+ eprintk(KERN_ERR, "cannot create receiving thread\n");
+ v9fs_session_close(v9ses);
+ return -ECONNABORTED;
+ }
+
+ wake_up_process(v9ses->recvproc);
+ wait_for_completion(&v9ses->proccmpl);
+
+ return 0;
+}
diff --git a/fs/9p/mux.h b/fs/9p/mux.h
new file mode 100644
index 000000000000..4994cb10badf
--- /dev/null
+++ b/fs/9p/mux.h
@@ -0,0 +1,41 @@
+/*
+ * linux/fs/9p/mux.h
+ *
+ * Multiplexer Definitions
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* structure to manage each RPC transaction */
+
+struct v9fs_rpcreq {
+ struct v9fs_fcall *tcall;
+ struct v9fs_fcall *rcall;
+ int err; /* error code if response failed */
+
+ /* XXX - could we put scatter/gather buffers here? */
+
+ struct list_head next;
+};
+
+int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
+long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
+ struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
+void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
diff --git a/fs/9p/trans_fd.c b/fs/9p/trans_fd.c
new file mode 100644
index 000000000000..63b58ce98ff4
--- /dev/null
+++ b/fs/9p/trans_fd.c
@@ -0,0 +1,172 @@
+/*
+ * linux/fs/9p/trans_fd.c
+ *
+ * File Descriptor Transport Layer
+ *
+ * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+struct v9fs_trans_fd {
+ struct file *in_file;
+ struct file *out_file;
+};
+
+/**
+ * v9fs_fd_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_fd_recv(struct v9fs_transport *trans, void *v, int len)
+{
+ struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+
+ if (!trans || trans->status != Connected || !ts)
+ return -EIO;
+
+ return kernel_read(ts->in_file, ts->in_file->f_pos, v, len);
+}
+
+/**
+ * v9fs_fd_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_fd_send(struct v9fs_transport *trans, void *v, int len)
+{
+ struct v9fs_trans_fd *ts = trans ? trans->priv : NULL;
+ mm_segment_t oldfs = get_fs();
+ int ret = 0;
+
+ if (!trans || trans->status != Connected || !ts)
+ return -EIO;
+
+ set_fs(get_ds());
+ /* The cast to a user pointer is valid due to the set_fs() */
+ ret = vfs_write(ts->out_file, (void __user *)v, len, &ts->out_file->f_pos);
+ set_fs(oldfs);
+
+ return ret;
+}
+
+/**
+ * v9fs_fd_init - initialize file descriptor transport
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+ struct v9fs_trans_fd *ts = NULL;
+ struct v9fs_transport *trans = v9ses->transport;
+
+ if((v9ses->wfdno == ~0) || (v9ses->rfdno == ~0)) {
+ printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n");
+ return -ENOPROTOOPT;
+ }
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
+
+ if (!ts)
+ return -ENOMEM;
+
+ ts->in_file = fget( v9ses->rfdno );
+ ts->out_file = fget( v9ses->wfdno );
+
+ if (!ts->in_file || !ts->out_file) {
+ if (ts->in_file)
+ fput(ts->in_file);
+
+ if (ts->out_file)
+ fput(ts->out_file);
+
+ kfree(ts);
+ return -EIO;
+ }
+
+ trans->priv = ts;
+ trans->status = Connected;
+
+ return 0;
+}
+
+
+/**
+ * v9fs_fd_close - shutdown file descriptor
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_fd_close(struct v9fs_transport *trans)
+{
+ struct v9fs_trans_fd *ts;
+
+ if (!trans)
+ return;
+
+ trans->status = Disconnected;
+ ts = trans->priv;
+
+ if (!ts)
+ return;
+
+ if (ts->in_file)
+ fput(ts->in_file);
+
+ if (ts->out_file)
+ fput(ts->out_file);
+
+ kfree(ts);
+}
+
+struct v9fs_transport v9fs_trans_fd = {
+ .init = v9fs_fd_init,
+ .write = v9fs_fd_send,
+ .read = v9fs_fd_recv,
+ .close = v9fs_fd_close,
+};
+
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
new file mode 100644
index 000000000000..01e26f0013ac
--- /dev/null
+++ b/fs/9p/trans_sock.c
@@ -0,0 +1,290 @@
+/*
+ * linux/fs/9p/trans_socket.c
+ *
+ * Socket Transport Layer
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ * Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <asm/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "transport.h"
+
+#define V9FS_PORT 564
+
+struct v9fs_trans_sock {
+ struct socket *s;
+};
+
+/**
+ * v9fs_sock_recv - receive from a socket
+ * @v9ses: session information
+ * @v: buffer to receive data into
+ * @len: size of receive buffer
+ *
+ */
+
+static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
+{
+ struct msghdr msg;
+ struct kvec iov;
+ int result;
+ mm_segment_t oldfs;
+ struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+ if (trans->status == Disconnected)
+ return -EREMOTEIO;
+
+ result = -EINVAL;
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+
+ iov.iov_base = v;
+ iov.iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_flags = MSG_NOSIGNAL;
+
+ result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+
+ dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
+ set_fs(oldfs);
+
+ if (result <= 0) {
+ if (result != -ERESTARTSYS)
+ trans->status = Disconnected;
+ }
+
+ return result;
+}
+
+/**
+ * v9fs_sock_send - send to a socket
+ * @v9ses: session information
+ * @v: buffer to send data from
+ * @len: size of send buffer
+ *
+ */
+
+static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
+{
+ struct kvec iov;
+ struct msghdr msg;
+ int result = -1;
+ mm_segment_t oldfs;
+ struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+
+ dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
+ dump_data(v, len);
+
+ down(&trans->writelock);
+
+ oldfs = get_fs();
+ set_fs(get_ds());
+ iov.iov_base = v;
+ iov.iov_len = len;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iovlen = 1;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_namelen = 0;
+ msg.msg_flags = MSG_NOSIGNAL;
+ result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+ set_fs(oldfs);
+
+ if (result < 0) {
+ if (result != -ERESTARTSYS)
+ trans->status = Disconnected;
+ }
+
+ up(&trans->writelock);
+ return result;
+}
+
+/**
+ * v9fs_tcp_init - initialize TCP socket
+ * @v9ses: session information
+ * @addr: address of server to mount
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
+{
+ struct socket *csocket = NULL;
+ struct sockaddr_in sin_server;
+ int rc = 0;
+ struct v9fs_trans_sock *ts = NULL;
+ struct v9fs_transport *trans = v9ses->transport;
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+
+ if (!ts)
+ return -ENOMEM;
+
+ trans->priv = ts;
+ ts->s = NULL;
+
+ if (!addr)
+ return -EINVAL;
+
+ dprintk(DEBUG_TRANS, "Connecting to %s\n", addr);
+
+ sin_server.sin_family = AF_INET;
+ sin_server.sin_addr.s_addr = in_aton(addr);
+ sin_server.sin_port = htons(v9ses->port);
+ sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
+ rc = csocket->ops->connect(csocket,
+ (struct sockaddr *)&sin_server,
+ sizeof(struct sockaddr_in), 0);
+ if (rc < 0) {
+ eprintk(KERN_ERR,
+ "v9fs_trans_tcp: problem connecting socket to %s\n",
+ addr);
+ return rc;
+ }
+ csocket->sk->sk_allocation = GFP_NOIO;
+ ts->s = csocket;
+ trans->status = Connected;
+
+ return 0;
+}
+
+/**
+ * v9fs_unix_init - initialize UNIX domain socket
+ * @v9ses: session information
+ * @dev_name: path to named pipe
+ * @data: mount options
+ *
+ */
+
+static int
+v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
+ char *data)
+{
+ int rc;
+ struct socket *csocket;
+ struct sockaddr_un sun_server;
+ struct v9fs_transport *trans;
+ struct v9fs_trans_sock *ts;
+
+ rc = 0;
+ csocket = NULL;
+ trans = v9ses->transport;
+
+ if (strlen(dev_name) > UNIX_PATH_MAX) {
+ eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
+ dev_name);
+ return -ENOMEM;
+ }
+
+ ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
+ if (!ts)
+ return -ENOMEM;
+
+ trans->priv = ts;
+ ts->s = NULL;
+
+ sema_init(&trans->writelock, 1);
+ sema_init(&trans->readlock, 1);
+
+ sun_server.sun_family = PF_UNIX;
+ strcpy(sun_server.sun_path, dev_name);
+ sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+ rc = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,
+ sizeof(struct sockaddr_un) - 1, 0); /* -1 *is* important */
+ if (rc < 0) {
+ eprintk(KERN_ERR,
+ "v9fs_trans_unix: problem connecting socket: %s: %d\n",
+ dev_name, rc);
+ return rc;
+ }
+ csocket->sk->sk_allocation = GFP_NOIO;
+ ts->s = csocket;
+ trans->status = Connected;
+
+ return 0;
+}
+
+/**
+ * v9fs_sock_close - shutdown socket
+ * @trans: private socket structure
+ *
+ */
+
+static void v9fs_sock_close(struct v9fs_transport *trans)
+{
+ struct v9fs_trans_sock *ts;
+
+ if (!trans)
+ return;
+
+ ts = trans->priv;
+
+ if ((ts) && (ts->s)) {
+ dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
+ sock_release(ts->s);
+ ts->s = NULL;
+ trans->status = Disconnected;
+ dprintk(DEBUG_TRANS, "socket closed\n");
+ }
+
+ if (ts)
+ kfree(ts);
+
+ trans->priv = NULL;
+}
+
+struct v9fs_transport v9fs_trans_tcp = {
+ .init = v9fs_tcp_init,
+ .write = v9fs_sock_send,
+ .read = v9fs_sock_recv,
+ .close = v9fs_sock_close,
+};
+
+struct v9fs_transport v9fs_trans_unix = {
+ .init = v9fs_unix_init,
+ .write = v9fs_sock_send,
+ .read = v9fs_sock_recv,
+ .close = v9fs_sock_close,
+};
diff --git a/fs/9p/transport.h b/fs/9p/transport.h
new file mode 100644
index 000000000000..9e9cd418efd5
--- /dev/null
+++ b/fs/9p/transport.h
@@ -0,0 +1,46 @@
+/*
+ * linux/fs/9p/transport.h
+ *
+ * Transport Definition
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+enum v9fs_transport_status {
+ Connected,
+ Disconnected,
+ Hung,
+};
+
+struct v9fs_transport {
+ enum v9fs_transport_status status;
+ struct semaphore writelock;
+ struct semaphore readlock;
+ void *priv;
+
+ int (*init) (struct v9fs_session_info *, const char *, char *);
+ int (*write) (struct v9fs_transport *, void *, int);
+ int (*read) (struct v9fs_transport *, void *, int);
+ void (*close) (struct v9fs_transport *);
+};
+
+extern struct v9fs_transport v9fs_trans_tcp;
+extern struct v9fs_transport v9fs_trans_unix;
+extern struct v9fs_transport v9fs_trans_fd;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
new file mode 100644
index 000000000000..13bdbbab4387
--- /dev/null
+++ b/fs/9p/v9fs.c
@@ -0,0 +1,452 @@
+/*
+ * linux/fs/9p/v9fs.c
+ *
+ * This file contains functions assisting in mapping VFS to 9P2000
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/parser.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "transport.h"
+#include "mux.h"
+#include "conv.h"
+
+/* TODO: sysfs or debugfs interface */
+int v9fs_debug_level = 0; /* feature-rific global debug level */
+
+/*
+ * Option Parsing (code inspired by NFS code)
+ *
+ */
+
+enum {
+ /* Options that take integer arguments */
+ Opt_port, Opt_msize, Opt_uid, Opt_gid, Opt_afid, Opt_debug,
+ Opt_rfdno, Opt_wfdno,
+ /* String options */
+ Opt_name, Opt_remotename,
+ /* Options that take no arguments */
+ Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+ /* Error token */
+ Opt_err
+};
+
+static match_table_t tokens = {
+ {Opt_port, "port=%u"},
+ {Opt_msize, "msize=%u"},
+ {Opt_uid, "uid=%u"},
+ {Opt_gid, "gid=%u"},
+ {Opt_afid, "afid=%u"},
+ {Opt_rfdno, "rfdno=%u"},
+ {Opt_wfdno, "wfdno=%u"},
+ {Opt_debug, "debug=%u"},
+ {Opt_name, "name=%s"},
+ {Opt_remotename, "aname=%s"},
+ {Opt_unix, "proto=unix"},
+ {Opt_tcp, "proto=tcp"},
+ {Opt_fd, "proto=fd"},
+ {Opt_tcp, "tcp"},
+ {Opt_unix, "unix"},
+ {Opt_fd, "fd"},
+ {Opt_legacy, "noextend"},
+ {Opt_nodevmap, "nodevmap"},
+ {Opt_err, NULL}
+};
+
+/*
+ * Parse option string.
+ */
+
+/**
+ * v9fs_parse_options - parse mount options into session structure
+ * @options: options string passed from mount
+ * @v9ses: existing v9fs session information
+ *
+ */
+
+static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int ret;
+
+ /* setup defaults */
+ v9ses->port = V9FS_PORT;
+ v9ses->maxdata = 9000;
+ v9ses->proto = PROTO_TCP;
+ v9ses->extended = 1;
+ v9ses->afid = ~0;
+ v9ses->debug = 0;
+ v9ses->rfdno = ~0;
+ v9ses->wfdno = ~0;
+
+ if (!options)
+ return;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+ if (token < Opt_name) {
+ if ((ret = match_int(&args[0], &option)) < 0) {
+ dprintk(DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ continue;
+ }
+
+ }
+ switch (token) {
+ case Opt_port:
+ v9ses->port = option;
+ break;
+ case Opt_msize:
+ v9ses->maxdata = option;
+ break;
+ case Opt_uid:
+ v9ses->uid = option;
+ break;
+ case Opt_gid:
+ v9ses->gid = option;
+ break;
+ case Opt_afid:
+ v9ses->afid = option;
+ break;
+ case Opt_rfdno:
+ v9ses->rfdno = option;
+ break;
+ case Opt_wfdno:
+ v9ses->wfdno = option;
+ break;
+ case Opt_debug:
+ v9ses->debug = option;
+ break;
+ case Opt_tcp:
+ v9ses->proto = PROTO_TCP;
+ break;
+ case Opt_unix:
+ v9ses->proto = PROTO_UNIX;
+ break;
+ case Opt_fd:
+ v9ses->proto = PROTO_FD;
+ break;
+ case Opt_name:
+ match_strcpy(v9ses->name, &args[0]);
+ break;
+ case Opt_remotename:
+ match_strcpy(v9ses->remotename, &args[0]);
+ break;
+ case Opt_legacy:
+ v9ses->extended = 0;
+ break;
+ case Opt_nodevmap:
+ v9ses->nodev = 1;
+ break;
+ default:
+ continue;
+ }
+ }
+}
+
+/**
+ * v9fs_inode2v9ses - safely extract v9fs session info from super block
+ * @inode: inode to extract information from
+ *
+ * Paranoid function to extract v9ses information from superblock,
+ * if anything is missing it will report an error.
+ *
+ */
+
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
+{
+ return (inode->i_sb->s_fs_info);
+}
+
+/**
+ * v9fs_get_idpool - allocate numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * the lock included in struct idr?
+ */
+
+int v9fs_get_idpool(struct v9fs_idpool *p)
+{
+ int i = 0;
+ int error;
+
+retry:
+ if (idr_pre_get(&p->pool, GFP_KERNEL) == 0)
+ return 0;
+
+ if (down_interruptible(&p->lock) == -EINTR) {
+ eprintk(KERN_WARNING, "Interrupted while locking\n");
+ return -1;
+ }
+
+ error = idr_get_new(&p->pool, NULL, &i);
+ up(&p->lock);
+
+ if (error == -EAGAIN)
+ goto retry;
+ else if (error)
+ return -1;
+
+ return i;
+}
+
+/**
+ * v9fs_put_idpool - release numeric id from pool
+ * @p - pool to allocate from
+ *
+ * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * the lock included in struct idr?
+ */
+
+void v9fs_put_idpool(int id, struct v9fs_idpool *p)
+{
+ if (down_interruptible(&p->lock) == -EINTR) {
+ eprintk(KERN_WARNING, "Interrupted while locking\n");
+ return;
+ }
+ idr_remove(&p->pool, id);
+ up(&p->lock);
+}
+
+/**
+ * v9fs_session_init - initialize session
+ * @v9ses: session information structure
+ * @dev_name: device being mounted
+ * @data: options
+ *
+ */
+
+int
+v9fs_session_init(struct v9fs_session_info *v9ses,
+ const char *dev_name, char *data)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_transport *trans_proto;
+ int n = 0;
+ int newfid = -1;
+ int retval = -EINVAL;
+
+ v9ses->name = __getname();
+ if (!v9ses->name)
+ return -ENOMEM;
+
+ v9ses->remotename = __getname();
+ if (!v9ses->remotename) {
+ putname(v9ses->name);
+ return -ENOMEM;
+ }
+
+ strcpy(v9ses->name, V9FS_DEFUSER);
+ strcpy(v9ses->remotename, V9FS_DEFANAME);
+
+ v9fs_parse_options(data, v9ses);
+
+ /* set global debug level */
+ v9fs_debug_level = v9ses->debug;
+
+ /* id pools that are session-dependent: FIDs and TIDs */
+ idr_init(&v9ses->fidpool.pool);
+ init_MUTEX(&v9ses->fidpool.lock);
+ idr_init(&v9ses->tidpool.pool);
+ init_MUTEX(&v9ses->tidpool.lock);
+
+
+ switch (v9ses->proto) {
+ case PROTO_TCP:
+ trans_proto = &v9fs_trans_tcp;
+ break;
+ case PROTO_UNIX:
+ trans_proto = &v9fs_trans_unix;
+ *v9ses->remotename = 0;
+ break;
+ case PROTO_FD:
+ trans_proto = &v9fs_trans_fd;
+ *v9ses->remotename = 0;
+ break;
+ default:
+ printk(KERN_ERR "v9fs: Bad mount protocol %d\n", v9ses->proto);
+ retval = -ENOPROTOOPT;
+ goto SessCleanUp;
+ };
+
+ v9ses->transport = trans_proto;
+
+ if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
+ eprintk(KERN_ERR, "problem initializing transport\n");
+ goto SessCleanUp;
+ }
+
+ v9ses->inprogress = 0;
+ v9ses->shutdown = 0;
+ v9ses->session_hung = 0;
+
+ if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+ dprintk(DEBUG_ERROR, "problem initializing mux\n");
+ goto SessCleanUp;
+ }
+
+ if (v9ses->afid == ~0) {
+ if (v9ses->extended)
+ retval =
+ v9fs_t_version(v9ses, v9ses->maxdata, "9P2000.u",
+ &fcall);
+ else
+ retval = v9fs_t_version(v9ses, v9ses->maxdata, "9P2000",
+ &fcall);
+
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_version failed\n");
+ goto FreeFcall;
+ }
+
+ /* Really should check for 9P1 and report error */
+ if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+ dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
+ v9ses->extended = 1;
+ } else {
+ dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
+ v9ses->extended = 0;
+ }
+
+ n = fcall->params.rversion.msize;
+ kfree(fcall);
+
+ if (n < v9ses->maxdata)
+ v9ses->maxdata = n;
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "couldn't allocate FID\n");
+ retval = -ENOMEM;
+ goto SessCleanUp;
+ }
+ /* it is a little bit ugly, but we have to prevent newfid */
+ /* being the same as afid, so if it is, get a new fid */
+ if (v9ses->afid != ~0 && newfid == v9ses->afid) {
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "couldn't allocate FID\n");
+ retval = -ENOMEM;
+ goto SessCleanUp;
+ }
+ }
+
+ if ((retval =
+ v9fs_t_attach(v9ses, v9ses->name, v9ses->remotename, newfid,
+ v9ses->afid, NULL))
+ < 0) {
+ dprintk(DEBUG_ERROR, "cannot attach\n");
+ goto SessCleanUp;
+ }
+
+ if (v9ses->afid != ~0) {
+ if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+ dprintk(DEBUG_ERROR, "clunk failed\n");
+ }
+
+ return newfid;
+
+ FreeFcall:
+ kfree(fcall);
+
+ SessCleanUp:
+ v9fs_session_close(v9ses);
+ return retval;
+}
+
+/**
+ * v9fs_session_close - shutdown a session
+ * @v9ses: session information structure
+ *
+ */
+
+void v9fs_session_close(struct v9fs_session_info *v9ses)
+{
+ if (v9ses->recvproc) {
+ send_sig(SIGKILL, v9ses->recvproc, 1);
+ wait_for_completion(&v9ses->proccmpl);
+ }
+
+ if (v9ses->transport)
+ v9ses->transport->close(v9ses->transport);
+
+ putname(v9ses->name);
+ putname(v9ses->remotename);
+}
+
+/**
+ * v9fs_session_cancel - mark transport as disconnected
+ * and cancel all pending requests.
+ */
+void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+ v9ses->transport->status = Disconnected;
+ v9fs_mux_cancel_requests(v9ses, -EIO);
+}
+
+extern int v9fs_error_init(void);
+
+/**
+ * v9fs_init - Initialize module
+ *
+ */
+
+static int __init init_v9fs(void)
+{
+ v9fs_error_init();
+
+ printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
+
+ return register_filesystem(&v9fs_fs_type);
+}
+
+/**
+ * v9fs_init - shutdown module
+ *
+ */
+
+static void __exit exit_v9fs(void)
+{
+ unregister_filesystem(&v9fs_fs_type);
+}
+
+module_init(init_v9fs)
+module_exit(exit_v9fs)
+
+MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
+MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>");
+MODULE_LICENSE("GPL");
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
new file mode 100644
index 000000000000..45dcef42bdd6
--- /dev/null
+++ b/fs/9p/v9fs.h
@@ -0,0 +1,103 @@
+/*
+ * V9FS definitions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/*
+ * Idpool structure provides lock and id management
+ *
+ */
+
+struct v9fs_idpool {
+ struct semaphore lock;
+ struct idr pool;
+};
+
+/*
+ * Session structure provides information for an opened session
+ *
+ */
+
+struct v9fs_session_info {
+ /* options */
+ unsigned int maxdata;
+ unsigned char extended; /* set to 1 if we are using UNIX extensions */
+ unsigned char nodev; /* set to 1 if no disable device mapping */
+ unsigned short port; /* port to connect to */
+ unsigned short debug; /* debug level */
+ unsigned short proto; /* protocol to use */
+ unsigned int afid; /* authentication fid */
+ unsigned int rfdno; /* read file descriptor number */
+ unsigned int wfdno; /* write file descriptor number */
+
+
+ char *name; /* user name to mount as */
+ char *remotename; /* name of remote hierarchy being mounted */
+ unsigned int uid; /* default uid/muid for legacy support */
+ unsigned int gid; /* default gid for legacy support */
+
+ /* book keeping */
+ struct v9fs_idpool fidpool; /* The FID pool for file descriptors */
+ struct v9fs_idpool tidpool; /* The TID pool for transactions ids */
+
+ /* transport information */
+ struct v9fs_transport *transport;
+
+ int inprogress; /* session in progress => true */
+ int shutdown; /* session shutting down. no more attaches. */
+ unsigned char session_hung;
+
+ /* mux private data */
+ struct v9fs_fcall *curfcall;
+ wait_queue_head_t read_wait;
+ struct completion fcread;
+ struct completion proccmpl;
+ struct task_struct *recvproc;
+
+ spinlock_t muxlock;
+ struct list_head mux_fcalls;
+};
+
+/* possible values of ->proto */
+enum {
+ PROTO_TCP,
+ PROTO_UNIX,
+ PROTO_FD,
+};
+
+int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
+struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
+void v9fs_session_close(struct v9fs_session_info *v9ses);
+int v9fs_get_idpool(struct v9fs_idpool *p);
+void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+void v9fs_session_cancel(struct v9fs_session_info *v9ses);
+
+#define V9FS_MAGIC 0x01021997
+
+/* other default globals */
+#define V9FS_PORT 564
+#define V9FS_DEFUSER "nobody"
+#define V9FS_DEFANAME ""
+
+/* inital pool sizes for fids and tags */
+#define V9FS_START_FIDS 8192
+#define V9FS_START_TIDS 256
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
new file mode 100644
index 000000000000..2f2cea7ee3e7
--- /dev/null
+++ b/fs/9p/v9fs_vfs.h
@@ -0,0 +1,53 @@
+/*
+ * V9FS VFS extensions.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+/* plan9 semantics are that created files are implicitly opened.
+ * But linux semantics are that you call create, then open.
+ * the plan9 approach is superior as it provides an atomic
+ * open.
+ * we track the create fid here. When the file is opened, if fidopen is
+ * non-zero, we use the fid and can skip some steps.
+ * there may be a better way to do this, but I don't know it.
+ * one BAD way is to clunk the fid on create, then open it again:
+ * you lose the atomicity of file open
+ */
+
+/* special case:
+ * unlink calls remove, which is an implicit clunk. So we have to track
+ * that kind of thing so that we don't try to clunk a dead fid.
+ */
+
+extern struct file_system_type v9fs_fs_type;
+extern struct file_operations v9fs_file_operations;
+extern struct file_operations v9fs_dir_operations;
+extern struct dentry_operations v9fs_dentry_operations;
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+ino_t v9fs_qid2ino(struct v9fs_qid *qid);
+void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
+ struct super_block *);
+int v9fs_dir_release(struct inode *inode, struct file *filp);
+int v9fs_file_open(struct inode *inode, struct file *file);
+void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_dentry_release(struct dentry *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
new file mode 100644
index 000000000000..306c96741f81
--- /dev/null
+++ b/fs/9p/vfs_dentry.c
@@ -0,0 +1,126 @@
+/*
+ * linux/fs/9p/vfs_dentry.c
+ *
+ * This file contians vfs dentry ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * v9fs_dentry_validate - VFS dcache hook to validate cache
+ * @dentry: dentry that is being validated
+ * @nd: path data
+ *
+ * dcache really shouldn't be used for 9P2000 as at all due to
+ * potential attached semantics to directory traversal (walk).
+ *
+ * FUTURE: look into how to use dcache to allow multi-stage
+ * walks in Plan 9 & potential for better dcache operation which
+ * would remain valid for Plan 9 semantics. Older versions
+ * had validation via stat for those interested. However, since
+ * stat has the same approximate overhead as walk there really
+ * is no difference. The only improvement would be from a
+ * time-decay cache like NFS has and that undermines the
+ * synchronous nature of 9P2000.
+ *
+ */
+
+static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *dc = current->fs->pwd;
+
+ dprintk(DEBUG_VFS, "dentry: %s (%p)\n", dentry->d_iname, dentry);
+ if (v9fs_fid_lookup(dentry, FID_OP)) {
+ dprintk(DEBUG_VFS, "VALID\n");
+ return 1;
+ }
+
+ while (dc != NULL) {
+ if (dc == dentry) {
+ dprintk(DEBUG_VFS, "VALID\n");
+ return 1;
+ }
+ if (dc == dc->d_parent)
+ break;
+
+ dc = dc->d_parent;
+ }
+
+ dprintk(DEBUG_VFS, "INVALID\n");
+ return 0;
+}
+
+/**
+ * v9fs_dentry_release - called when dentry is going to be freed
+ * @dentry: dentry that is being release
+ *
+ */
+
+void v9fs_dentry_release(struct dentry *dentry)
+{
+ dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+ if (dentry->d_fsdata != NULL) {
+ struct list_head *fid_list = dentry->d_fsdata;
+ struct v9fs_fid *temp = NULL;
+ struct v9fs_fid *current_fid = NULL;
+ struct v9fs_fcall *fcall = NULL;
+
+ list_for_each_entry_safe(current_fid, temp, fid_list, list) {
+ if (v9fs_t_clunk
+ (current_fid->v9ses, current_fid->fid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(current_fid->fid,
+ &current_fid->v9ses->fidpool);
+
+ kfree(fcall);
+ v9fs_fid_destroy(current_fid);
+ }
+
+ kfree(dentry->d_fsdata); /* free the list_head */
+ }
+}
+
+struct dentry_operations v9fs_dentry_operations = {
+ .d_revalidate = v9fs_dentry_validate,
+ .d_release = v9fs_dentry_release,
+};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
new file mode 100644
index 000000000000..c478a7384186
--- /dev/null
+++ b/fs/9p/vfs_dir.c
@@ -0,0 +1,226 @@
+/*
+ * linux/fs/9p/vfs_dir.c
+ *
+ * This file contains vfs directory ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+/**
+ * dt_type - return file type
+ * @mistat: mistat structure
+ *
+ */
+
+static inline int dt_type(struct v9fs_stat *mistat)
+{
+ unsigned long perm = mistat->mode;
+ int rettype = DT_REG;
+
+ if (perm & V9FS_DMDIR)
+ rettype = DT_DIR;
+ if (perm & V9FS_DMSYMLINK)
+ rettype = DT_LNK;
+
+ return rettype;
+}
+
+/**
+ * v9fs_dir_readdir - read a directory
+ * @filep: opened file structure
+ * @dirent: directory structure ???
+ * @filldir: function to populate directory structure ???
+ *
+ */
+
+static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *file = filp->private_data;
+ unsigned int i, n;
+ int fid = -1;
+ int ret = 0;
+ struct v9fs_stat *mi = NULL;
+ int over = 0;
+
+ dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
+
+ fid = file->fid;
+
+ mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ if (!mi)
+ return -ENOMEM;
+
+ if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
+ kfree(file->rdir_fcall);
+ file->rdir_fcall = NULL;
+ }
+
+ if (file->rdir_fcall) {
+ n = file->rdir_fcall->params.rread.count;
+ i = file->rdir_fpos;
+ while (i < n) {
+ int s = v9fs_deserialize_stat(v9ses,
+ file->rdir_fcall->params.rread.data + i,
+ n - i, mi, v9ses->maxdata);
+
+ if (s == 0) {
+ dprintk(DEBUG_ERROR,
+ "error while deserializing mistat\n");
+ ret = -EIO;
+ goto FreeStructs;
+ }
+
+ over = filldir(dirent, mi->name, strlen(mi->name),
+ filp->f_pos, v9fs_qid2ino(&mi->qid),
+ dt_type(mi));
+
+ if (over) {
+ file->rdir_fpos = i;
+ file->rdir_pos = filp->f_pos;
+ break;
+ }
+
+ i += s;
+ filp->f_pos += s;
+ }
+
+ if (!over) {
+ kfree(file->rdir_fcall);
+ file->rdir_fcall = NULL;
+ }
+ }
+
+ while (!over) {
+ ret = v9fs_t_read(v9ses, fid, filp->f_pos,
+ v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+ if (ret < 0) {
+ dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
+ ret, fcall);
+ goto FreeStructs;
+ } else if (ret == 0)
+ break;
+
+ n = ret;
+ i = 0;
+ while (i < n) {
+ int s = v9fs_deserialize_stat(v9ses,
+ fcall->params.rread.data + i, n - i, mi,
+ v9ses->maxdata);
+
+ if (s == 0) {
+ dprintk(DEBUG_ERROR,
+ "error while deserializing mistat\n");
+ return -EIO;
+ }
+
+ over = filldir(dirent, mi->name, strlen(mi->name),
+ filp->f_pos, v9fs_qid2ino(&mi->qid),
+ dt_type(mi));
+
+ if (over) {
+ file->rdir_fcall = fcall;
+ file->rdir_fpos = i;
+ file->rdir_pos = filp->f_pos;
+ fcall = NULL;
+ break;
+ }
+
+ i += s;
+ filp->f_pos += s;
+ }
+
+ kfree(fcall);
+ }
+
+ FreeStructs:
+ kfree(fcall);
+ kfree(mi);
+ return ret;
+}
+
+/**
+ * v9fs_dir_release - close a directory
+ * @inode: inode of the directory
+ * @filp: file pointer to a directory
+ *
+ */
+
+int v9fs_dir_release(struct inode *inode, struct file *filp)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *fid = filp->private_data;
+ int fidnum = -1;
+
+ dprintk(DEBUG_VFS, "inode: %p filp: %p fid: %d\n", inode, filp,
+ fid->fid);
+ fidnum = fid->fid;
+
+ filemap_fdatawrite(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+
+ if (fidnum >= 0) {
+ fid->fidopen--;
+ dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
+ fid->fid);
+
+ if (fid->fidopen == 0) {
+ if (v9fs_t_clunk(v9ses, fidnum, NULL))
+ dprintk(DEBUG_ERROR, "clunk failed\n");
+
+ v9fs_put_idpool(fid->fid, &v9ses->fidpool);
+ }
+
+ kfree(fid->rdir_fcall);
+
+ filp->private_data = NULL;
+ v9fs_fid_destroy(fid);
+ }
+
+ d_drop(filp->f_dentry);
+ return 0;
+}
+
+struct file_operations v9fs_dir_operations = {
+ .read = generic_read_dir,
+ .readdir = v9fs_dir_readdir,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+};
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
new file mode 100644
index 000000000000..1f8ae7d580ab
--- /dev/null
+++ b/fs/9p/vfs_file.c
@@ -0,0 +1,401 @@
+/*
+ * linux/fs/9p/vfs_file.c
+ *
+ * This file contians vfs file ops for 9P2000.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/version.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "fid.h"
+
+/**
+ * v9fs_file_open - open a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ */
+
+int v9fs_file_open(struct inode *inode, struct file *file)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9fid = v9fs_fid_lookup(file->f_dentry, FID_WALK);
+ struct v9fs_fid *v9newfid = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ int open_mode = 0;
+ unsigned int iounit = 0;
+ int newfid = -1;
+ long result = -1;
+
+ dprintk(DEBUG_VFS, "inode: %p file: %p v9fid= %p\n", inode, file,
+ v9fid);
+
+ if (!v9fid) {
+ struct dentry *dentry = file->f_dentry;
+ dprintk(DEBUG_ERROR, "Couldn't resolve fid from dentry\n");
+
+ /* XXX - some duplication from lookup, generalize later */
+ /* basically vfs_lookup is too heavy weight */
+ v9fid = v9fs_fid_lookup(file->f_dentry, FID_OP);
+ if (!v9fid)
+ return -EBADF;
+
+ v9fid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+ if (!v9fid)
+ return -EBADF;
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "newfid fails!\n");
+ return -ENOSPC;
+ }
+
+ result =
+ v9fs_t_walk(v9ses, v9fid->fid, newfid,
+ (char *)file->f_dentry->d_name.name, NULL);
+ if (result < 0) {
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ dprintk(DEBUG_ERROR, "rewalk didn't work\n");
+ return -EBADF;
+ }
+
+ v9fid = v9fs_fid_create(dentry);
+ if (v9fid == NULL) {
+ dprintk(DEBUG_ERROR, "couldn't insert\n");
+ return -ENOMEM;
+ }
+ v9fid->fid = newfid;
+ }
+
+ if (v9fid->fidcreate) {
+ /* create case */
+ newfid = v9fid->fid;
+ iounit = v9fid->iounit;
+ v9fid->fidcreate = 0;
+ } else {
+ if (!S_ISDIR(inode->i_mode))
+ newfid = v9fid->fid;
+ else {
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "allocation failed\n");
+ return -ENOSPC;
+ }
+ /* This would be a somewhat critical clone */
+ result =
+ v9fs_t_walk(v9ses, v9fid->fid, newfid, NULL,
+ &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "clone error: %s\n",
+ FCALL_ERROR(fcall));
+ kfree(fcall);
+ return result;
+ }
+
+ v9newfid = v9fs_fid_create(file->f_dentry);
+ v9newfid->fid = newfid;
+ v9newfid->qid = v9fid->qid;
+ v9newfid->iounit = v9fid->iounit;
+ v9newfid->fidopen = 0;
+ v9newfid->fidclunked = 0;
+ v9newfid->v9ses = v9ses;
+ v9fid = v9newfid;
+ kfree(fcall);
+ }
+
+ /* TODO: do special things for O_EXCL, O_NOFOLLOW, O_SYNC */
+ /* translate open mode appropriately */
+ open_mode = file->f_flags & 0x3;
+
+ if (file->f_flags & O_EXCL)
+ open_mode |= V9FS_OEXCL;
+
+ if (v9ses->extended) {
+ if (file->f_flags & O_TRUNC)
+ open_mode |= V9FS_OTRUNC;
+
+ if (file->f_flags & O_APPEND)
+ open_mode |= V9FS_OAPPEND;
+ }
+
+ result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR,
+ "open failed, open_mode 0x%x: %s\n", open_mode,
+ FCALL_ERROR(fcall));
+ kfree(fcall);
+ return result;
+ }
+
+ iounit = fcall->params.ropen.iounit;
+ kfree(fcall);
+ }
+
+
+ file->private_data = v9fid;
+
+ v9fid->rdir_pos = 0;
+ v9fid->rdir_fcall = NULL;
+ v9fid->fidopen = 1;
+ v9fid->filp = file;
+ v9fid->iounit = iounit;
+
+ return 0;
+}
+
+/**
+ * v9fs_file_lock - lock a file (or directory)
+ * @inode: inode to be opened
+ * @file: file being opened
+ *
+ * XXX - this looks like a local only lock, we should extend into 9P
+ * by using open exclusive
+ */
+
+static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ int res = 0;
+ struct inode *inode = filp->f_dentry->d_inode;
+
+ dprintk(DEBUG_VFS, "filp: %p lock: %p\n", filp, fl);
+
+ /* No mandatory locks */
+ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ return -ENOLCK;
+
+ if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+ filemap_fdatawrite(inode->i_mapping);
+ filemap_fdatawait(inode->i_mapping);
+ invalidate_inode_pages(&inode->i_data);
+ }
+
+ return res;
+}
+
+/**
+ * v9fs_read - read from a file (internal)
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_read(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9f = filp->private_data;
+ struct v9fs_fcall *fcall = NULL;
+ int fid = v9f->fid;
+ int rsize = 0;
+ int result = 0;
+ int total = 0;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+ if (v9f->iounit != 0 && rsize > v9f->iounit)
+ rsize = v9f->iounit;
+
+ do {
+ if (count < rsize)
+ rsize = count;
+
+ result = v9fs_t_read(v9ses, fid, *offset, rsize, &fcall);
+
+ if (result < 0) {
+ printk(KERN_ERR "9P2000: v9fs_t_read returned %d\n",
+ result);
+
+ kfree(fcall);
+ return total;
+ } else
+ *offset += result;
+
+ /* XXX - extra copy */
+ memcpy(buffer, fcall->params.rread.data, result);
+ count -= result;
+ buffer += result;
+ total += result;
+
+ kfree(fcall);
+
+ if (result < rsize)
+ break;
+ } while (count);
+
+ return total;
+}
+
+/**
+ * v9fs_file_read - read from a file
+ * @filep: file pointer to read
+ * @data: data buffer to read data into
+ * @count: size of buffer
+ * @offset: offset at which to read data
+ *
+ */
+
+static ssize_t
+v9fs_file_read(struct file *filp, char __user * data, size_t count,
+ loff_t * offset)
+{
+ int retval = -1;
+ int ret = 0;
+ char *buffer;
+
+ buffer = kmalloc(count, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ retval = v9fs_read(filp, buffer, count, offset);
+ if (retval > 0) {
+ if ((ret = copy_to_user(data, buffer, retval)) != 0) {
+ dprintk(DEBUG_ERROR, "Problem copying to user %d\n",
+ ret);
+ retval = ret;
+ }
+ }
+
+ kfree(buffer);
+
+ return retval;
+}
+
+/**
+ * v9fs_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_write(struct file *filp, char *buffer, size_t count, loff_t * offset)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
+ struct v9fs_fid *v9fid = filp->private_data;
+ struct v9fs_fcall *fcall;
+ int fid = v9fid->fid;
+ int result = -EIO;
+ int rsize = 0;
+ int total = 0;
+
+ dprintk(DEBUG_VFS, "data %p count %d offset %x\n", buffer, (int)count,
+ (int)*offset);
+ rsize = v9ses->maxdata - V9FS_IOHDRSZ;
+ if (v9fid->iounit != 0 && rsize > v9fid->iounit)
+ rsize = v9fid->iounit;
+
+ dump_data(buffer, count);
+
+ do {
+ if (count < rsize)
+ rsize = count;
+
+ result =
+ v9fs_t_write(v9ses, fid, *offset, rsize, buffer, &fcall);
+ if (result < 0) {
+ eprintk(KERN_ERR, "error while writing: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+ kfree(fcall);
+ return result;
+ } else
+ *offset += result;
+
+ kfree(fcall);
+
+ if (result != rsize) {
+ eprintk(KERN_ERR,
+ "short write: v9fs_t_write returned %d\n",
+ result);
+ break;
+ }
+
+ count -= result;
+ buffer += result;
+ total += result;
+ } while (count);
+
+ return total;
+}
+
+/**
+ * v9fs_file_write - write to a file
+ * @filep: file pointer to write
+ * @data: data buffer to write data from
+ * @count: size of buffer
+ * @offset: offset at which to write data
+ *
+ */
+
+static ssize_t
+v9fs_file_write(struct file *filp, const char __user * data,
+ size_t count, loff_t * offset)
+{
+ int ret = -1;
+ char *buffer;
+
+ buffer = kmalloc(count, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ ret = copy_from_user(buffer, data, count);
+ if (ret) {
+ dprintk(DEBUG_ERROR, "Problem copying from user\n");
+ ret = -EFAULT;
+ } else {
+ ret = v9fs_write(filp, buffer, count, offset);
+ }
+
+ kfree(buffer);
+
+ return ret;
+}
+
+struct file_operations v9fs_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = v9fs_file_read,
+ .write = v9fs_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock,
+};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
new file mode 100644
index 000000000000..0c13fc600049
--- /dev/null
+++ b/fs/9p/vfs_inode.c
@@ -0,0 +1,1338 @@
+/*
+ * linux/fs/9p/vfs_inode.c
+ *
+ * This file contains vfs inode ops for the 9P2000 protocol.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/namei.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static struct inode_operations v9fs_dir_inode_operations;
+static struct inode_operations v9fs_dir_inode_operations_ext;
+static struct inode_operations v9fs_file_inode_operations;
+static struct inode_operations v9fs_symlink_inode_operations;
+
+/**
+ * unixmode2p9mode - convert unix mode bits to plan 9
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
+{
+ int res;
+ res = mode & 0777;
+ if (S_ISDIR(mode))
+ res |= V9FS_DMDIR;
+ if (v9ses->extended) {
+ if (S_ISLNK(mode))
+ res |= V9FS_DMSYMLINK;
+ if (v9ses->nodev == 0) {
+ if (S_ISSOCK(mode))
+ res |= V9FS_DMSOCKET;
+ if (S_ISFIFO(mode))
+ res |= V9FS_DMNAMEDPIPE;
+ if (S_ISBLK(mode))
+ res |= V9FS_DMDEVICE;
+ if (S_ISCHR(mode))
+ res |= V9FS_DMDEVICE;
+ }
+
+ if ((mode & S_ISUID) == S_ISUID)
+ res |= V9FS_DMSETUID;
+ if ((mode & S_ISGID) == S_ISGID)
+ res |= V9FS_DMSETGID;
+ if ((mode & V9FS_DMLINK))
+ res |= V9FS_DMLINK;
+ }
+
+ return res;
+}
+
+/**
+ * p9mode2unixmode- convert plan9 mode bits to unix mode bits
+ * @v9ses: v9fs session information
+ * @mode: mode to convert
+ *
+ */
+
+static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
+{
+ int res;
+
+ res = mode & 0777;
+
+ if ((mode & V9FS_DMDIR) == V9FS_DMDIR)
+ res |= S_IFDIR;
+ else if ((mode & V9FS_DMSYMLINK) && (v9ses->extended))
+ res |= S_IFLNK;
+ else if ((mode & V9FS_DMSOCKET) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFSOCK;
+ else if ((mode & V9FS_DMNAMEDPIPE) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFIFO;
+ else if ((mode & V9FS_DMDEVICE) && (v9ses->extended)
+ && (v9ses->nodev == 0))
+ res |= S_IFBLK;
+ else
+ res |= S_IFREG;
+
+ if (v9ses->extended) {
+ if ((mode & V9FS_DMSETUID) == V9FS_DMSETUID)
+ res |= S_ISUID;
+
+ if ((mode & V9FS_DMSETGID) == V9FS_DMSETGID)
+ res |= S_ISGID;
+ }
+
+ return res;
+}
+
+/**
+ * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * @v9ses: 9P session info (for determining extended mode)
+ * @mistat: structure to initialize
+ *
+ */
+
+static void
+v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+{
+ mistat->type = ~0;
+ mistat->dev = ~0;
+ mistat->qid.type = ~0;
+ mistat->qid.version = ~0;
+ *((long long *)&mistat->qid.path) = ~0;
+ mistat->mode = ~0;
+ mistat->atime = ~0;
+ mistat->mtime = ~0;
+ mistat->length = ~0;
+ mistat->name = mistat->data;
+ mistat->uid = mistat->data;
+ mistat->gid = mistat->data;
+ mistat->muid = mistat->data;
+ if (v9ses->extended) {
+ mistat->n_uid = ~0;
+ mistat->n_gid = ~0;
+ mistat->n_muid = ~0;
+ mistat->extension = mistat->data;
+ }
+ *mistat->data = 0;
+}
+
+/**
+ * v9fs_mistat2unix - convert mistat to unix stat
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @buf: unix metadata (stat) structure to populate
+ * @sb: superblock
+ *
+ */
+
+static void
+v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
+ struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
+
+ buf->st_nlink = 1;
+
+ buf->st_atime = mistat->atime;
+ buf->st_mtime = mistat->mtime;
+ buf->st_ctime = mistat->mtime;
+
+ buf->st_uid = (unsigned short)-1;
+ buf->st_gid = (unsigned short)-1;
+
+ if (v9ses && v9ses->extended) {
+ /* TODO: string to uid mapping via user-space daemon */
+ if (mistat->n_uid != -1)
+ sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
+
+ if (mistat->n_gid != -1)
+ sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
+ }
+
+ if (buf->st_uid == (unsigned short)-1)
+ buf->st_uid = v9ses->uid;
+ if (buf->st_gid == (unsigned short)-1)
+ buf->st_gid = v9ses->gid;
+
+ buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
+ if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
+ char type = 0;
+ int major = -1;
+ int minor = -1;
+ sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+ switch (type) {
+ case 'c':
+ buf->st_mode &= ~S_IFBLK;
+ buf->st_mode |= S_IFCHR;
+ break;
+ case 'b':
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+ type, mistat->extension);
+ };
+ buf->st_rdev = MKDEV(major, minor);
+ } else
+ buf->st_rdev = 0;
+
+ buf->st_size = mistat->length;
+
+ buf->st_blksize = sb->s_blocksize;
+ buf->st_blocks =
+ (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_get_inode - helper function to setup an inode
+ * @sb: superblock
+ * @mode: mode to setup inode with
+ *
+ */
+
+struct inode *v9fs_get_inode(struct super_block *sb, int mode)
+{
+ struct inode *inode = NULL;
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ dprintk(DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
+
+ inode = new_inode(sb);
+ if (inode) {
+ inode->i_mode = mode;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_blksize = sb->s_blocksize;
+ inode->i_blocks = 0;
+ inode->i_rdev = 0;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+ switch (mode & S_IFMT) {
+ case S_IFIFO:
+ case S_IFBLK:
+ case S_IFCHR:
+ case S_IFSOCK:
+ if(!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "special files without extended mode\n");
+ return ERR_PTR(-EINVAL);
+ }
+ init_special_inode(inode, inode->i_mode,
+ inode->i_rdev);
+ break;
+ case S_IFREG:
+ inode->i_op = &v9fs_file_inode_operations;
+ inode->i_fop = &v9fs_file_operations;
+ break;
+ case S_IFLNK:
+ if(!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "extended modes used w/o 9P2000.u\n");
+ return ERR_PTR(-EINVAL);
+ }
+ inode->i_op = &v9fs_symlink_inode_operations;
+ break;
+ case S_IFDIR:
+ inode->i_nlink++;
+ if(v9ses->extended)
+ inode->i_op = &v9fs_dir_inode_operations_ext;
+ else
+ inode->i_op = &v9fs_dir_inode_operations;
+ inode->i_fop = &v9fs_dir_operations;
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n",
+ mode, mode & S_IFMT);
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ eprintk(KERN_WARNING, "Problem allocating inode\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ return inode;
+}
+
+/**
+ * v9fs_create - helper function to create files and directories
+ * @dir: directory inode file is being created in
+ * @file_dentry: dentry file is being created in
+ * @perm: permissions file is being created with
+ * @open_mode: resulting open mode for file
+ *
+ */
+
+static int
+v9fs_create(struct inode *dir,
+ struct dentry *file_dentry,
+ unsigned int perm, unsigned int open_mode)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct super_block *sb = dir->i_sb;
+ struct v9fs_fid *dirfid =
+ v9fs_fid_lookup(file_dentry->d_parent, FID_WALK);
+ struct v9fs_fid *fid = NULL;
+ struct inode *file_inode = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_qid qid;
+ struct stat newstat;
+ int dirfidnum = -1;
+ long newfid = -1;
+ int result = 0;
+ unsigned int iounit = 0;
+
+ perm = unixmode2p9mode(v9ses, perm);
+
+ dprintk(DEBUG_VFS, "dir: %p dentry: %p perm: %o mode: %o\n", dir,
+ file_dentry, perm, open_mode);
+
+ if (!dirfid)
+ return -EBADF;
+
+ dirfidnum = dirfid->fid;
+ if (dirfidnum < 0) {
+ dprintk(DEBUG_ERROR, "No fid for the directory #%lu\n",
+ dir->i_ino);
+ return -EBADF;
+ }
+
+ if (file_dentry->d_inode) {
+ dprintk(DEBUG_ERROR,
+ "Odd. There is an inode for dir %lu, name :%s:\n",
+ dir->i_ino, file_dentry->d_name.name);
+ return -EEXIST;
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "no free fids available\n");
+ return -ENOSPC;
+ }
+
+ result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ newfid = 0;
+ goto CleanUpFid;
+ }
+
+ kfree(fcall);
+
+ result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
+ perm, open_mode, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+
+ goto CleanUpFid;
+ }
+
+ iounit = fcall->params.rcreate.iounit;
+ qid = fcall->params.rcreate.qid;
+ kfree(fcall);
+
+ fid = v9fs_fid_create(file_dentry);
+ if (!fid) {
+ result = -ENOMEM;
+ goto CleanUpFid;
+ }
+
+ fid->fid = newfid;
+ fid->fidopen = 0;
+ fid->fidcreate = 1;
+ fid->qid = qid;
+ fid->iounit = iounit;
+ fid->rdir_pos = 0;
+ fid->rdir_fcall = NULL;
+ fid->v9ses = v9ses;
+
+ if ((perm & V9FS_DMSYMLINK) || (perm & V9FS_DMLINK) ||
+ (perm & V9FS_DMNAMEDPIPE) || (perm & V9FS_DMSOCKET) ||
+ (perm & V9FS_DMDEVICE))
+ return 0;
+
+ result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
+ result);
+ goto CleanUpFid;
+ }
+
+ v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+
+ file_inode = v9fs_get_inode(sb, newstat.st_mode);
+ if ((!file_inode) || IS_ERR(file_inode)) {
+ dprintk(DEBUG_ERROR, "create inode failed\n");
+ result = -EBADF;
+ goto CleanUpFid;
+ }
+
+ v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+ kfree(fcall);
+ d_instantiate(file_dentry, file_inode);
+
+ if (perm & V9FS_DMDIR) {
+ if (v9fs_t_clunk(v9ses, newfid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ kfree(fcall);
+ fid->fidopen = 0;
+ fid->fidcreate = 0;
+ d_drop(file_dentry);
+ }
+
+ return 0;
+
+ CleanUpFid:
+ kfree(fcall);
+
+ if (newfid) {
+ if (v9fs_t_clunk(v9ses, newfid, &fcall))
+ dprintk(DEBUG_ERROR, "clunk failed: %s\n",
+ FCALL_ERROR(fcall));
+
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ kfree(fcall);
+ }
+ return result;
+}
+
+/**
+ * v9fs_remove - helper function to remove files and directories
+ * @dir: directory inode that is being deleted
+ * @file: dentry that is being deleted
+ * @rmdir: removing a directory
+ *
+ */
+
+static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct super_block *sb = NULL;
+ struct v9fs_session_info *v9ses = NULL;
+ struct v9fs_fid *v9fid = NULL;
+ struct inode *file_inode = NULL;
+ int fid = -1;
+ int result = 0;
+
+ dprintk(DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
+ rmdir);
+
+ file_inode = file->d_inode;
+ sb = file_inode->i_sb;
+ v9ses = v9fs_inode2v9ses(file_inode);
+ v9fid = v9fs_fid_lookup(file, FID_OP);
+
+ if (!v9fid) {
+ dprintk(DEBUG_ERROR,
+ "no v9fs_fid\n");
+ return -EBADF;
+ }
+
+ fid = v9fid->fid;
+ if (fid < 0) {
+ dprintk(DEBUG_ERROR, "inode #%lu, no fid!\n",
+ file_inode->i_ino);
+ return -EBADF;
+ }
+
+ result = v9fs_t_remove(v9ses, fid, &fcall);
+ if (result < 0)
+ dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
+ FCALL_ERROR(fcall), result);
+ else {
+ v9fs_put_idpool(fid, &v9ses->fidpool);
+ v9fs_fid_destroy(v9fid);
+ }
+
+ kfree(fcall);
+ return result;
+}
+
+/**
+ * v9fs_vfs_create - VFS hook to create files
+ * @inode: directory inode that is being deleted
+ * @dentry: dentry that is being deleted
+ * @perm: create permissions
+ * @nd: path information
+ *
+ */
+
+static int
+v9fs_vfs_create(struct inode *inode, struct dentry *dentry, int perm,
+ struct nameidata *nd)
+{
+ return v9fs_create(inode, dentry, perm, O_RDWR);
+}
+
+/**
+ * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
+ * @inode: inode that is being unlinked
+ * @dentry: dentry that is being unlinked
+ * @mode: mode for new directory
+ *
+ */
+
+static int v9fs_vfs_mkdir(struct inode *inode, struct dentry *dentry, int mode)
+{
+ return v9fs_create(inode, dentry, mode | S_IFDIR, O_RDONLY);
+}
+
+/**
+ * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode
+ * @dir: inode that is being walked from
+ * @dentry: dentry that is being walked to?
+ * @nameidata: path data
+ *
+ */
+
+static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nameidata)
+{
+ struct super_block *sb;
+ struct v9fs_session_info *v9ses;
+ struct v9fs_fid *dirfid;
+ struct v9fs_fid *fid;
+ struct inode *inode;
+ struct v9fs_fcall *fcall = NULL;
+ struct stat newstat;
+ int dirfidnum = -1;
+ int newfid = -1;
+ int result = 0;
+
+ dprintk(DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n",
+ dir, dentry->d_iname, dentry, nameidata);
+
+ sb = dir->i_sb;
+ v9ses = v9fs_inode2v9ses(dir);
+ dirfid = v9fs_fid_lookup(dentry->d_parent, FID_WALK);
+
+ if (!dirfid) {
+ dprintk(DEBUG_ERROR, "no dirfid\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ dirfidnum = dirfid->fid;
+
+ if (dirfidnum < 0) {
+ dprintk(DEBUG_ERROR, "no dirfid for inode %p, #%lu\n",
+ dir, dir->i_ino);
+ return ERR_PTR(-EBADF);
+ }
+
+ newfid = v9fs_get_idpool(&v9ses->fidpool);
+ if (newfid < 0) {
+ eprintk(KERN_WARNING, "newfid fails!\n");
+ return ERR_PTR(-ENOSPC);
+ }
+
+ result =
+ v9fs_t_walk(v9ses, dirfidnum, newfid, (char *)dentry->d_name.name,
+ NULL);
+ if (result < 0) {
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ if (result == -ENOENT) {
+ d_add(dentry, NULL);
+ dprintk(DEBUG_ERROR,
+ "Return negative dentry %p count %d\n",
+ dentry, atomic_read(&dentry->d_count));
+ return NULL;
+ }
+ dprintk(DEBUG_ERROR, "walk error:%d\n", result);
+ goto FreeFcall;
+ }
+
+ result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (result < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ goto FreeFcall;
+ }
+
+ v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
+ inode = v9fs_get_inode(sb, newstat.st_mode);
+
+ if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
+ eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
+ PTR_ERR(inode));
+
+ result = -ENOSPC;
+ goto FreeFcall;
+ }
+
+ inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+
+ fid = v9fs_fid_create(dentry);
+ if (fid == NULL) {
+ dprintk(DEBUG_ERROR, "couldn't insert\n");
+ result = -ENOMEM;
+ goto FreeFcall;
+ }
+
+ fid->fid = newfid;
+ fid->fidopen = 0;
+ fid->v9ses = v9ses;
+ fid->qid = fcall->params.rstat.stat->qid;
+
+ dentry->d_op = &v9fs_dentry_operations;
+ v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+
+ d_add(dentry, inode);
+ kfree(fcall);
+
+ return NULL;
+
+ FreeFcall:
+ kfree(fcall);
+ return ERR_PTR(result);
+}
+
+/**
+ * v9fs_vfs_unlink - VFS unlink hook to delete an inode
+ * @i: inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
+{
+ return v9fs_remove(i, d, 0);
+}
+
+/**
+ * v9fs_vfs_rmdir - VFS unlink hook to delete a directory
+ * @i: inode that is being unlinked
+ * @d: dentry that is being unlinked
+ *
+ */
+
+static int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
+{
+ return v9fs_remove(i, d, 1);
+}
+
+/**
+ * v9fs_vfs_rename - VFS hook to rename an inode
+ * @old_dir: old dir inode
+ * @old_dentry: old dentry
+ * @new_dir: new dir inode
+ * @new_dentry: new dentry
+ *
+ */
+
+static int
+v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *old_inode = old_dentry->d_inode;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(old_inode);
+ struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_WALK);
+ struct v9fs_fid *olddirfid =
+ v9fs_fid_lookup(old_dentry->d_parent, FID_WALK);
+ struct v9fs_fid *newdirfid =
+ v9fs_fid_lookup(new_dentry->d_parent, FID_WALK);
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ struct v9fs_fcall *fcall = NULL;
+ int fid = -1;
+ int olddirfidnum = -1;
+ int newdirfidnum = -1;
+ int retval = 0;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
+ dprintk(DEBUG_ERROR, "problem with arguments\n");
+ return -EBADF;
+ }
+
+ /* 9P can only handle file rename in the same directory */
+ if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
+ dprintk(DEBUG_ERROR, "old dir and new dir are different\n");
+ retval = -EPERM;
+ goto FreeFcallnBail;
+ }
+
+ fid = oldfid->fid;
+ olddirfidnum = olddirfid->fid;
+ newdirfidnum = newdirfid->fid;
+
+ if (fid < 0) {
+ dprintk(DEBUG_ERROR, "no fid for old file #%lu\n",
+ old_inode->i_ino);
+ retval = -EBADF;
+ goto FreeFcallnBail;
+ }
+
+ v9fs_blank_mistat(v9ses, mistat);
+
+ strcpy(mistat->data + 1, v9ses->name);
+ mistat->name = mistat->data + 1 + strlen(v9ses->name);
+
+ if (new_dentry->d_name.len >
+ (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
+ dprintk(DEBUG_ERROR, "new name too long\n");
+ goto FreeFcallnBail;
+ }
+
+ strcpy(mistat->name, new_dentry->d_name.name);
+ retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+
+ FreeFcallnBail:
+ kfree(mistat);
+
+ if (retval < 0)
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+
+ kfree(fcall);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_getattr - retreive file metadata
+ * @mnt - mount information
+ * @dentry - file to get attributes on
+ * @stat - metadata structure to populate
+ *
+ */
+
+static int
+v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+ int err = -EPERM;
+
+ dprintk(DEBUG_VFS, "dentry: %p\n", dentry);
+ if (!fid) {
+ dprintk(DEBUG_ERROR,
+ "couldn't find fid associated with dentry\n");
+ return -EBADF;
+ }
+
+ err = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+ if (err < 0)
+ dprintk(DEBUG_ERROR, "stat error\n");
+ else {
+ v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+ dentry->d_inode->i_sb);
+ generic_fillattr(dentry->d_inode, stat);
+ }
+
+ kfree(fcall);
+ return err;
+}
+
+/**
+ * v9fs_vfs_setattr - set file metadata
+ * @dentry: file whose metadata to set
+ * @iattr: metadata assignment structure
+ *
+ */
+
+static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ int res = -EPERM;
+
+ dprintk(DEBUG_VFS, "\n");
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!fid) {
+ dprintk(DEBUG_ERROR,
+ "Couldn't find fid associated with dentry\n");
+ return -EBADF;
+ }
+
+ v9fs_blank_mistat(v9ses, mistat);
+ if (iattr->ia_valid & ATTR_MODE)
+ mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+
+ if (iattr->ia_valid & ATTR_MTIME)
+ mistat->mtime = iattr->ia_mtime.tv_sec;
+
+ if (iattr->ia_valid & ATTR_ATIME)
+ mistat->atime = iattr->ia_atime.tv_sec;
+
+ if (iattr->ia_valid & ATTR_SIZE)
+ mistat->length = iattr->ia_size;
+
+ if (v9ses->extended) {
+ char *ptr = mistat->data+1;
+
+ if (iattr->ia_valid & ATTR_UID) {
+ mistat->uid = ptr;
+ ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
+ mistat->n_uid = iattr->ia_uid;
+ }
+
+ if (iattr->ia_valid & ATTR_GID) {
+ mistat->gid = ptr;
+ ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
+ mistat->n_gid = iattr->ia_gid;
+ }
+ }
+
+ res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+
+ if (res < 0)
+ dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+
+ kfree(mistat);
+ kfree(fcall);
+
+ if (res >= 0)
+ res = inode_setattr(dentry->d_inode, iattr);
+
+ return res;
+}
+
+/**
+ * v9fs_mistat2inode - populate an inode structure with mistat info
+ * @mistat: Plan 9 metadata (mistat) structure
+ * @inode: inode to populate
+ * @sb: superblock of filesystem
+ *
+ */
+
+void
+v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
+ struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ inode->i_nlink = 1;
+
+ inode->i_atime.tv_sec = mistat->atime;
+ inode->i_mtime.tv_sec = mistat->mtime;
+ inode->i_ctime.tv_sec = mistat->mtime;
+
+ inode->i_uid = -1;
+ inode->i_gid = -1;
+
+ if (v9ses->extended) {
+ /* TODO: string to uid mapping via user-space daemon */
+ inode->i_uid = mistat->n_uid;
+ inode->i_gid = mistat->n_gid;
+
+ if (mistat->n_uid == -1)
+ sscanf(mistat->uid, "%x", &inode->i_uid);
+
+ if (mistat->n_gid == -1)
+ sscanf(mistat->gid, "%x", &inode->i_gid);
+ }
+
+ if (inode->i_uid == -1)
+ inode->i_uid = v9ses->uid;
+ if (inode->i_gid == -1)
+ inode->i_gid = v9ses->gid;
+
+ inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+ if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
+ char type = 0;
+ int major = -1;
+ int minor = -1;
+ sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+ switch (type) {
+ case 'c':
+ inode->i_mode &= ~S_IFBLK;
+ inode->i_mode |= S_IFCHR;
+ break;
+ case 'b':
+ break;
+ default:
+ dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
+ type, mistat->extension);
+ };
+ inode->i_rdev = MKDEV(major, minor);
+ } else
+ inode->i_rdev = 0;
+
+ inode->i_size = mistat->length;
+
+ inode->i_blksize = sb->s_blocksize;
+ inode->i_blocks =
+ (inode->i_size + inode->i_blksize - 1) >> sb->s_blocksize_bits;
+}
+
+/**
+ * v9fs_qid2ino - convert qid into inode number
+ * @qid: qid to hash
+ *
+ * BUG: potential for inode number collisions?
+ */
+
+ino_t v9fs_qid2ino(struct v9fs_qid *qid)
+{
+ u64 path = qid->path + 2;
+ ino_t i = 0;
+
+ if (sizeof(ino_t) == sizeof(path))
+ memcpy(&i, &path, sizeof(ino_t));
+ else
+ i = (ino_t) (path ^ (path >> 32));
+
+ return i;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+ int retval = -EPERM;
+ struct v9fs_fid *newfid;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+
+ dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+ symname);
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeFcall;
+ }
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, S_IFLNK, 0);
+ if (retval != 0)
+ goto FreeFcall;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeFcall;
+ }
+
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeFcall;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ FreeFcall:
+ kfree(mistat);
+ kfree(fcall);
+
+ return retval;
+}
+
+/**
+ * v9fs_readlink - read a symlink's location (internal version)
+ * @dentry: dentry for symlink
+ * @buffer: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ int retval = -EPERM;
+
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
+ struct v9fs_fid *fid = v9fs_fid_lookup(dentry, FID_OP);
+
+ if (!fid) {
+ dprintk(DEBUG_ERROR, "could not resolve fid from dentry\n");
+ retval = -EBADF;
+ goto FreeFcall;
+ }
+
+ if (!v9ses->extended) {
+ retval = -EBADF;
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeFcall;
+ }
+
+ dprintk(DEBUG_VFS, " %s\n", dentry->d_name.name);
+ retval = v9fs_t_stat(v9ses, fid->fid, &fcall);
+
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ goto FreeFcall;
+ }
+
+ if (!fcall)
+ return -EIO;
+
+ if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+ retval = -EINVAL;
+ goto FreeFcall;
+ }
+
+ /* copy extension buffer into buffer */
+ if (strlen(fcall->params.rstat.stat->extension) < buflen)
+ buflen = strlen(fcall->params.rstat.stat->extension);
+
+ memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+
+ retval = buflen;
+
+ FreeFcall:
+ kfree(fcall);
+
+ return retval;
+}
+
+/**
+ * v9fs_vfs_readlink - read a symlink's location
+ * @dentry: dentry for symlink
+ * @buf: buffer to load symlink location into
+ * @buflen: length of buffer
+ *
+ */
+
+static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
+ int buflen)
+{
+ int retval;
+ int ret;
+ char *link = __getname();
+
+ if (strlen(link) < buflen)
+ buflen = strlen(link);
+
+ dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+ retval = v9fs_readlink(dentry, link, buflen);
+
+ if (retval > 0) {
+ if ((ret = copy_to_user(buffer, link, retval)) != 0) {
+ dprintk(DEBUG_ERROR, "problem copying to user: %d\n",
+ ret);
+ retval = ret;
+ }
+ }
+
+ putname(link);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ int len = 0;
+ char *link = __getname();
+
+ dprintk(DEBUG_VFS, "%s n", dentry->d_name.name);
+
+ if (!link)
+ link = ERR_PTR(-ENOMEM);
+ else {
+ len = v9fs_readlink(dentry, link, strlen(link));
+
+ if (len < 0) {
+ putname(link);
+ link = ERR_PTR(len);
+ } else
+ link[len] = 0;
+ }
+ nd_set_link(nd, link);
+
+ return NULL;
+}
+
+/**
+ * v9fs_vfs_put_link - release a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+ char *s = nd_get_link(nd);
+
+ dprintk(DEBUG_VFS, " %s %s\n", dentry->d_name.name, s);
+ if (!IS_ERR(s))
+ putname(s);
+}
+
+/**
+ * v9fs_vfs_link - create a hardlink
+ * @old_dentry: dentry for file to link to
+ * @dir: inode destination for new link
+ * @dentry: dentry for link
+ *
+ */
+
+/* XXX - lots of code dup'd from symlink and creates,
+ * figure out a better reuse strategy
+ */
+
+static int
+v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ int retval = -EPERM;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry, FID_OP);
+ struct v9fs_fid *newfid = NULL;
+ char *symname = __getname();
+
+ dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+ old_dentry->d_name.name);
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeMem;
+ }
+
+ /* get fid of old_dentry */
+ sprintf(symname, "hardlink(%d)\n", oldfid->fid);
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
+ if (retval != 0)
+ goto FreeMem;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+ if (!newfid) {
+ dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+ goto FreeMem;
+ }
+
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ kfree(fcall);
+ fcall = NULL;
+
+ FreeMem:
+ kfree(mistat);
+ kfree(fcall);
+ putname(symname);
+ return retval;
+}
+
+/**
+ * v9fs_vfs_mknod - create a special file
+ * @dir: inode destination for new link
+ * @dentry: dentry for file
+ * @mode: mode for creation
+ * @dev_t: device associated with special file
+ *
+ */
+
+static int
+v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
+{
+ int retval = -EPERM;
+ struct v9fs_fid *newfid;
+ struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
+ struct v9fs_fcall *fcall = NULL;
+ struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+ char *symname = __getname();
+
+ dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
+ dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+
+ if (!mistat)
+ return -ENOMEM;
+
+ if (!new_valid_dev(rdev)) {
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ if (!v9ses->extended) {
+ dprintk(DEBUG_ERROR, "not extended\n");
+ goto FreeMem;
+ }
+
+ /* issue a create */
+ retval = v9fs_create(dir, dentry, mode, 0);
+
+ if (retval != 0)
+ goto FreeMem;
+
+ newfid = v9fs_fid_lookup(dentry, FID_OP);
+ if (!newfid) {
+ dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ /* build extension */
+ if (S_ISBLK(mode))
+ sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+ else if (S_ISCHR(mode))
+ sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+ else if (S_ISFIFO(mode))
+ ; /* DO NOTHING */
+ else {
+ retval = -EINVAL;
+ goto FreeMem;
+ }
+
+ if (!S_ISFIFO(mode)) {
+ /* issue a twstat */
+ v9fs_blank_mistat(v9ses, mistat);
+ strcpy(mistat->data + 1, symname);
+ mistat->extension = mistat->data + 1;
+ retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
+ if (retval < 0) {
+ dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+ }
+
+ /* need to update dcache so we show up */
+ kfree(fcall);
+
+ if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
+ dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
+ FCALL_ERROR(fcall));
+ goto FreeMem;
+ }
+
+ d_drop(dentry); /* FID - will this also clunk? */
+
+ FreeMem:
+ kfree(mistat);
+ kfree(fcall);
+ putname(symname);
+
+ return retval;
+}
+
+static struct inode_operations v9fs_dir_inode_operations_ext = {
+ .create = v9fs_vfs_create,
+ .lookup = v9fs_vfs_lookup,
+ .symlink = v9fs_vfs_symlink,
+ .link = v9fs_vfs_link,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod,
+ .rename = v9fs_vfs_rename,
+ .readlink = v9fs_vfs_readlink,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_dir_inode_operations = {
+ .create = v9fs_vfs_create,
+ .lookup = v9fs_vfs_lookup,
+ .unlink = v9fs_vfs_unlink,
+ .mkdir = v9fs_vfs_mkdir,
+ .rmdir = v9fs_vfs_rmdir,
+ .mknod = v9fs_vfs_mknod,
+ .rename = v9fs_vfs_rename,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_file_inode_operations = {
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
+
+static struct inode_operations v9fs_symlink_inode_operations = {
+ .readlink = v9fs_vfs_readlink,
+ .follow_link = v9fs_vfs_follow_link,
+ .put_link = v9fs_vfs_put_link,
+ .getattr = v9fs_vfs_getattr,
+ .setattr = v9fs_vfs_setattr,
+};
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
new file mode 100644
index 000000000000..868f350b2c5f
--- /dev/null
+++ b/fs/9p/vfs_super.c
@@ -0,0 +1,280 @@
+/*
+ * linux/fs/9p/vfs_super.c
+ *
+ * This file contians superblock ops for 9P2000. It is intended that
+ * you mount this file system on directories.
+ *
+ * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/inet.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/idr.h>
+
+#include "debug.h"
+#include "v9fs.h"
+#include "9p.h"
+#include "v9fs_vfs.h"
+#include "conv.h"
+#include "fid.h"
+
+static void v9fs_clear_inode(struct inode *);
+static struct super_operations v9fs_super_ops;
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+
+static void v9fs_clear_inode(struct inode *inode)
+{
+ filemap_fdatawrite(inode->i_mapping);
+}
+
+/**
+ * v9fs_set_super - set the superblock
+ * @s: super block
+ * @data: file system specific data
+ *
+ */
+
+static int v9fs_set_super(struct super_block *s, void *data)
+{
+ s->s_fs_info = data;
+ return set_anon_super(s, data);
+}
+
+/**
+ * v9fs_fill_super - populate superblock with info
+ * @sb: superblock
+ * @v9ses: session information
+ *
+ */
+
+static void
+v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
+ int flags)
+{
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize_bits = fls(v9ses->maxdata - 1);
+ sb->s_blocksize = 1 << sb->s_blocksize_bits;
+ sb->s_magic = V9FS_MAGIC;
+ sb->s_op = &v9fs_super_ops;
+
+ sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
+ MS_NODIRATIME | MS_NOATIME;
+}
+
+/**
+ * v9fs_get_sb - mount a superblock
+ * @fs_type: file system type
+ * @flags: mount flags
+ * @dev_name: device name that was mounted
+ * @data: mount options
+ *
+ */
+
+static struct super_block *v9fs_get_sb(struct file_system_type
+ *fs_type, int flags,
+ const char *dev_name, void *data)
+{
+ struct super_block *sb = NULL;
+ struct v9fs_fcall *fcall = NULL;
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ struct v9fs_session_info *v9ses = NULL;
+ struct v9fs_fid *root_fid = NULL;
+ int mode = S_IRWXUGO | S_ISVTX;
+ uid_t uid = current->fsuid;
+ gid_t gid = current->fsgid;
+ int stat_result = 0;
+ int newfid = 0;
+ int retval = 0;
+
+ dprintk(DEBUG_VFS, " \n");
+
+ v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+ if (!v9ses)
+ return ERR_PTR(-ENOMEM);
+
+ if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
+ dprintk(DEBUG_ERROR, "problem initiating session\n");
+ retval = newfid;
+ goto free_session;
+ }
+
+ sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
+
+ v9fs_fill_super(sb, v9ses, flags);
+
+ inode = v9fs_get_inode(sb, S_IFDIR | mode);
+ if (IS_ERR(inode)) {
+ retval = PTR_ERR(inode);
+ goto put_back_sb;
+ }
+
+ inode->i_uid = uid;
+ inode->i_gid = gid;
+
+ root = d_alloc_root(inode);
+
+ if (!root) {
+ retval = -ENOMEM;
+ goto release_inode;
+ }
+
+ sb->s_root = root;
+
+ /* Setup the Root Inode */
+ root_fid = v9fs_fid_create(root);
+ if (root_fid == NULL) {
+ retval = -ENOMEM;
+ goto release_dentry;
+ }
+
+ root_fid->fidopen = 0;
+ root_fid->v9ses = v9ses;
+
+ stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
+ if (stat_result < 0) {
+ dprintk(DEBUG_ERROR, "stat error\n");
+ v9fs_t_clunk(v9ses, newfid, NULL);
+ v9fs_put_idpool(newfid, &v9ses->fidpool);
+ } else {
+ root_fid->fid = newfid;
+ root_fid->qid = fcall->params.rstat.stat->qid;
+ root->d_inode->i_ino =
+ v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+ v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+ }
+
+ kfree(fcall);
+
+ if (stat_result < 0) {
+ retval = stat_result;
+ goto release_dentry;
+ }
+
+ return sb;
+
+ release_dentry:
+ dput(sb->s_root);
+
+ release_inode:
+ iput(inode);
+
+ put_back_sb:
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ v9fs_session_close(v9ses);
+
+ free_session:
+ kfree(v9ses);
+
+ return ERR_PTR(retval);
+}
+
+/**
+ * v9fs_kill_super - Kill Superblock
+ * @s: superblock
+ *
+ */
+
+static void v9fs_kill_super(struct super_block *s)
+{
+ struct v9fs_session_info *v9ses = s->s_fs_info;
+
+ dprintk(DEBUG_VFS, " %p\n", s);
+
+ v9fs_dentry_release(s->s_root); /* clunk root */
+
+ kill_anon_super(s);
+
+ v9fs_session_close(v9ses);
+ kfree(v9ses);
+ dprintk(DEBUG_VFS, "exiting kill_super\n");
+}
+
+/**
+ * v9fs_show_options - Show mount options in /proc/mounts
+ * @m: seq_file to write to
+ * @mnt: mount descriptor
+ *
+ */
+
+static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info;
+
+ if (v9ses->debug != 0)
+ seq_printf(m, ",debug=%u", v9ses->debug);
+ if (v9ses->port != V9FS_PORT)
+ seq_printf(m, ",port=%u", v9ses->port);
+ if (v9ses->maxdata != 9000)
+ seq_printf(m, ",msize=%u", v9ses->maxdata);
+ if (v9ses->afid != ~0)
+ seq_printf(m, ",afid=%u", v9ses->afid);
+ if (v9ses->proto == PROTO_UNIX)
+ seq_puts(m, ",proto=unix");
+ if (v9ses->extended == 0)
+ seq_puts(m, ",noextend");
+ if (v9ses->nodev == 1)
+ seq_puts(m, ",nodevmap");
+ seq_printf(m, ",name=%s", v9ses->name);
+ seq_printf(m, ",aname=%s", v9ses->remotename);
+ seq_printf(m, ",uid=%u", v9ses->uid);
+ seq_printf(m, ",gid=%u", v9ses->gid);
+ return 0;
+}
+
+static void
+v9fs_umount_begin(struct super_block *sb)
+{
+ struct v9fs_session_info *v9ses = sb->s_fs_info;
+
+ v9fs_session_cancel(v9ses);
+}
+
+static struct super_operations v9fs_super_ops = {
+ .statfs = simple_statfs,
+ .clear_inode = v9fs_clear_inode,
+ .show_options = v9fs_show_options,
+ .umount_begin = v9fs_umount_begin,
+};
+
+struct file_system_type v9fs_fs_type = {
+ .name = "9P",
+ .get_sb = v9fs_get_sb,
+ .kill_sb = v9fs_kill_super,
+ .owner = THIS_MODULE,
+};
diff --git a/fs/Kconfig b/fs/Kconfig
index 5e817902cb3b..068ccea2f184 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -462,6 +462,19 @@ config AUTOFS4_FS
local network, you probably do not need an automounter, and can say
N here.
+config FUSE_FS
+ tristate "Filesystem in Userspace support"
+ help
+ With FUSE it is possible to implement a fully functional filesystem
+ in a userspace program.
+
+ There's also companion library: libfuse. This library along with
+ utilities is available from the FUSE homepage:
+ <http://fuse.sourceforge.net/>
+
+ If you want to develop a userspace FS, or if you want to use
+ a filesystem based on FUSE, answer Y or M.
+
menu "CD-ROM/DVD Filesystems"
config ISO9660_FS
@@ -1703,6 +1716,17 @@ config AFS_FS
config RXRPC
tristate
+config 9P_FS
+ tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+ depends on INET && EXPERIMENTAL
+ help
+ If you say Y here, you will get experimental support for
+ Plan 9 resource sharing via the 9P2000 protocol.
+
+ See <http://v9fs.sf.net> for more information.
+
+ If unsure, say N.
+
endmenu
menu "Partition Types"
diff --git a/fs/Makefile b/fs/Makefile
index 15158309dee4..1972da186272 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -89,11 +89,13 @@ obj-$(CONFIG_QNX4FS_FS) += qnx4/
obj-$(CONFIG_AUTOFS_FS) += autofs/
obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
+obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_RELAYFS_FS) += relayfs/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_JFS_FS) += jfs/
obj-$(CONFIG_XFS_FS) += xfs/
+obj-$(CONFIG_9P_FS) += 9p/
obj-$(CONFIG_AFS_FS) += afs/
obj-$(CONFIG_BEFS_FS) += befs/
obj-$(CONFIG_HOSTFS) += hostfs/
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 7aa6f2004536..9ebe881c6786 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -255,6 +255,7 @@ void
affs_delete_inode(struct inode *inode)
{
pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
if (S_ISREG(inode->i_mode))
affs_truncate(inode);
diff --git a/fs/aio.c b/fs/aio.c
index 4f641abac3c0..38f62680fd63 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
#include <linux/highmem.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/rcuref.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
/* Must be done under the lock to serialise against cancellation.
* Call this aio_fput as it duplicates fput via the fput_work.
*/
- if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
@@ -546,6 +547,24 @@ struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ioctx;
}
+static int lock_kiocb_action(void *param)
+{
+ schedule();
+ return 0;
+}
+
+static inline void lock_kiocb(struct kiocb *iocb)
+{
+ wait_on_bit_lock(&iocb->ki_flags, KIF_LOCKED, lock_kiocb_action,
+ TASK_UNINTERRUPTIBLE);
+}
+
+static inline void unlock_kiocb(struct kiocb *iocb)
+{
+ kiocbClearLocked(iocb);
+ wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
+}
+
/*
* use_mm
* Makes the calling kernel thread take on the specified
@@ -786,7 +805,9 @@ static int __aio_run_iocbs(struct kioctx *ctx)
* Hold an extra reference while retrying i/o.
*/
iocb->ki_users++; /* grab extra reference */
+ lock_kiocb(iocb);
aio_run_iocb(iocb);
+ unlock_kiocb(iocb);
if (__aio_put_req(ctx, iocb)) /* drop extra ref */
put_ioctx(ctx);
}
@@ -1527,10 +1548,9 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
goto out_put_req;
spin_lock_irq(&ctx->ctx_lock);
- if (likely(list_empty(&ctx->run_list))) {
- aio_run_iocb(req);
- } else {
- list_add_tail(&req->ki_run_list, &ctx->run_list);
+ aio_run_iocb(req);
+ unlock_kiocb(req);
+ if (!list_empty(&ctx->run_list)) {
/* drain the run list */
while (__aio_run_iocbs(ctx))
;
@@ -1661,6 +1681,7 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
if (NULL != cancel) {
struct io_event tmp;
pr_debug("calling cancel\n");
+ lock_kiocb(kiocb);
memset(&tmp, 0, sizeof(tmp));
tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
tmp.data = kiocb->ki_user_data;
@@ -1672,8 +1693,9 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb,
if (copy_to_user(result, &tmp, sizeof(tmp)))
ret = -EFAULT;
}
+ unlock_kiocb(kiocb);
} else
- printk(KERN_DEBUG "iocb has no cancel operation\n");
+ ret = -EINVAL;
put_ioctx(ctx);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 6171431272dc..990c28da5aec 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_sb_info {
struct file *pipe;
pid_t oz_pgrp;
int catatonic;
+ struct super_block *sb;
unsigned long exp_timeout;
ino_t next_dir_ino;
struct autofs_wait_queue *queues; /* Wait queue pointer */
@@ -134,7 +135,7 @@ void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
void autofs_hash_delete(struct autofs_dir_ent *);
struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_dirhash *);
+void autofs_hash_nuke(struct autofs_sb_info *);
/* Expiration-handling functions */
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 448143fd0796..5ccfcf26310d 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -232,13 +232,13 @@ void autofs_hash_dputall(struct autofs_dirhash *dh)
/* Delete everything. This is used on filesystem destruction, so we
make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_dirhash *dh)
+void autofs_hash_nuke(struct autofs_sb_info *sbi)
{
int i;
struct autofs_dir_ent *ent, *nent;
for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
- for ( ent = dh->h[i] ; ent ; ent = nent ) {
+ for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
nent = ent->next;
if ( ent->dentry )
dput(ent->dentry);
@@ -246,4 +246,5 @@ void autofs_hash_nuke(struct autofs_dirhash *dh)
kfree(ent);
}
}
+ shrink_dcache_sb(sbi->sb);
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 4888c1fabbf7..65e5ed42190e 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -27,7 +27,7 @@ static void autofs_put_super(struct super_block *sb)
if ( !sbi->catatonic )
autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
- autofs_hash_nuke(&sbi->dirhash);
+ autofs_hash_nuke(sbi);
for ( n = 0 ; n < AUTOFS_MAX_SYMLINKS ; n++ ) {
if ( test_bit(n, sbi->symlink_bitmap) )
kfree(sbi->symlink[n].data);
@@ -148,6 +148,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
s->s_magic = AUTOFS_SUPER_MAGIC;
s->s_op = &autofs_sops;
s->s_time_gran = 1;
+ sbi->sb = s;
root_inode = iget(s, AUTOFS_ROOT_INO);
root = d_alloc_root(root_inode);
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 1020dbc88bec..1fbc53f14aba 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -20,7 +20,6 @@ struct bfs_sb_info {
unsigned long si_lasti;
unsigned long * si_imap;
struct buffer_head * si_sbh; /* buffer header w/superblock */
- struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */
};
/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 5a1e5ce057ff..e240c335eb23 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -2,6 +2,7 @@
* fs/bfs/dir.c
* BFS directory operations.
* Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005
*/
#include <linux/time.h>
@@ -20,9 +21,9 @@
#define dprintf(x...)
#endif
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino);
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino);
static struct buffer_head * bfs_find_entry(struct inode * dir,
- const char * name, int namelen, struct bfs_dirent ** res_dir);
+ const unsigned char * name, int namelen, struct bfs_dirent ** res_dir);
static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
{
@@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir)
de = (struct bfs_dirent *)(bh->b_data + offset);
if (de->ino) {
int size = strnlen(de->name, BFS_NAMELEN);
- if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) {
+ if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) {
brelse(bh);
unlock_kernel();
return 0;
@@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode,
inode->i_mapping->a_ops = &bfs_aops;
inode->i_mode = mode;
inode->i_ino = ino;
- BFS_I(inode)->i_dsk_ino = ino;
+ BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino);
BFS_I(inode)->i_sblock = 0;
BFS_I(inode)->i_eblock = 0;
insert_inode_hash(inode);
@@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st
lock_kernel();
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
if (bh) {
- unsigned long ino = le32_to_cpu(de->ino);
+ unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
brelse(bh);
inode = iget(dir->i_sb, ino);
if (!inode) {
@@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry)
inode = dentry->d_inode;
lock_kernel();
bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
- if (!bh || de->ino != inode->i_ino)
+ if (!bh || le16_to_cpu(de->ino) != inode->i_ino)
goto out_brelse;
if (!inode->i_nlink) {
@@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry,
old_dentry->d_name.name,
old_dentry->d_name.len, &old_de);
- if (!old_bh || old_de->ino != old_inode->i_ino)
+ if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino)
goto end_rename;
error = -EPERM;
@@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = {
.rename = bfs_rename,
};
-static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino)
+static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino)
{
struct buffer_head * bh;
struct bfs_dirent * de;
@@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
}
dir->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(dir);
- de->ino = ino;
+ de->ino = cpu_to_le16((u16)ino);
for (i=0; i<BFS_NAMELEN; i++)
de->name[i] = (i < namelen) ? name[i] : 0;
mark_buffer_dirty(bh);
@@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int
return -ENOSPC;
}
-static inline int bfs_namecmp(int len, const char * name, const char * buffer)
+static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer)
{
if (len < BFS_NAMELEN && buffer[len])
return 0;
@@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer)
}
static struct buffer_head * bfs_find_entry(struct inode * dir,
- const char * name, int namelen, struct bfs_dirent ** res_dir)
+ const unsigned char * name, int namelen, struct bfs_dirent ** res_dir)
{
unsigned long block, offset;
struct buffer_head * bh;
@@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir,
}
de = (struct bfs_dirent *)(bh->b_data + offset);
offset += BFS_DIRENT_SIZE;
- if (de->ino && bfs_namecmp(namelen, name, de->name)) {
+ if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) {
*res_dir = de;
return bh;
}
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 747fd1ea55e0..807723b65daf 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo
return 0;
}
-static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end,
- unsigned long where)
+static int bfs_move_blocks(struct super_block *sb, unsigned long start,
+ unsigned long end, unsigned long where)
{
unsigned long i;
@@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned
static int bfs_get_block(struct inode * inode, sector_t block,
struct buffer_head * bh_result, int create)
{
- long phys;
+ unsigned long phys;
int err;
struct super_block *sb = inode->i_sb;
struct bfs_sb_info *info = BFS_SB(sb);
struct bfs_inode_info *bi = BFS_I(inode);
struct buffer_head *sbh = info->si_sbh;
- if (block < 0 || block > info->si_blocks)
+ if (block > info->si_blocks)
return -EIO;
phys = bi->i_sblock + block;
if (!create) {
if (phys <= bi->i_eblock) {
- dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys);
+ dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n",
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
}
return 0;
@@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block,
of blocks allocated for this file, we can grant it */
if (inode->i_size && phys <= bi->i_eblock) {
dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n",
- create, block, phys);
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
return 0;
}
@@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block,
/* the rest has to be protected against itself */
lock_kernel();
- /* if the last data block for this file is the last allocated block, we can
- extend the file trivially, without moving it anywhere */
+ /* if the last data block for this file is the last allocated
+ block, we can extend the file trivially, without moving it
+ anywhere */
if (bi->i_eblock == info->si_lf_eblk) {
dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n",
- create, block, phys);
+ create, (unsigned long)block, phys);
map_bh(bh_result, sb, phys);
info->si_freeb -= phys - bi->i_eblock;
info->si_lf_eblk = bi->i_eblock = phys;
@@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block,
} else
err = 0;
- dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys);
+ dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n",
+ create, (unsigned long)block, phys);
bi->i_sblock = phys;
phys += block;
info->si_lf_eblk = bi->i_eblock = phys;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 64e0fb33fc0c..c7b39aa279d7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -3,6 +3,8 @@
* BFS superblock and inode operations.
* Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
+ *
+ * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
*/
#include <linux/module.h>
@@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode)
off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
di = (struct bfs_inode *)bh->b_data + off;
- inode->i_mode = 0x0000FFFF & di->i_mode;
- if (di->i_vtype == BFS_VDIR) {
+ inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode);
+ if (le32_to_cpu(di->i_vtype) == BFS_VDIR) {
inode->i_mode |= S_IFDIR;
inode->i_op = &bfs_dir_inops;
inode->i_fop = &bfs_dir_operations;
- } else if (di->i_vtype == BFS_VREG) {
+ } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) {
inode->i_mode |= S_IFREG;
inode->i_op = &bfs_file_inops;
inode->i_fop = &bfs_file_operations;
inode->i_mapping->a_ops = &bfs_aops;
}
- inode->i_uid = di->i_uid;
- inode->i_gid = di->i_gid;
- inode->i_nlink = di->i_nlink;
+ BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock);
+ BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock);
+ inode->i_uid = le32_to_cpu(di->i_uid);
+ inode->i_gid = le32_to_cpu(di->i_gid);
+ inode->i_nlink = le32_to_cpu(di->i_nlink);
inode->i_size = BFS_FILESIZE(di);
inode->i_blocks = BFS_FILEBLOCKS(di);
+ if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks);
inode->i_blksize = PAGE_SIZE;
- inode->i_atime.tv_sec = di->i_atime;
- inode->i_mtime.tv_sec = di->i_mtime;
- inode->i_ctime.tv_sec = di->i_ctime;
+ inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
+ inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
+ inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
- BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */
- BFS_I(inode)->i_sblock = di->i_sblock;
- BFS_I(inode)->i_eblock = di->i_eblock;
+ BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */
brelse(bh);
}
static int bfs_write_inode(struct inode * inode, int unused)
{
- unsigned long ino = inode->i_ino;
+ unsigned int ino = (u16)inode->i_ino;
+ unsigned long i_sblock;
struct bfs_inode * di;
struct buffer_head * bh;
int block, off;
+ dprintf("ino=%08x\n", ino);
+
if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) {
- printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino);
+ printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
return -EIO;
}
@@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused)
block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1;
bh = sb_bread(inode->i_sb, block);
if (!bh) {
- printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino);
+ printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino);
unlock_kernel();
return -EIO;
}
@@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused)
off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
di = (struct bfs_inode *)bh->b_data + off;
- if (inode->i_ino == BFS_ROOT_INO)
- di->i_vtype = BFS_VDIR;
+ if (ino == BFS_ROOT_INO)
+ di->i_vtype = cpu_to_le32(BFS_VDIR);
else
- di->i_vtype = BFS_VREG;
-
- di->i_ino = inode->i_ino;
- di->i_mode = inode->i_mode;
- di->i_uid = inode->i_uid;
- di->i_gid = inode->i_gid;
- di->i_nlink = inode->i_nlink;
- di->i_atime = inode->i_atime.tv_sec;
- di->i_mtime = inode->i_mtime.tv_sec;
- di->i_ctime = inode->i_ctime.tv_sec;
- di->i_sblock = BFS_I(inode)->i_sblock;
- di->i_eblock = BFS_I(inode)->i_eblock;
- di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1;
+ di->i_vtype = cpu_to_le32(BFS_VREG);
+
+ di->i_ino = cpu_to_le16(ino);
+ di->i_mode = cpu_to_le32(inode->i_mode);
+ di->i_uid = cpu_to_le32(inode->i_uid);
+ di->i_gid = cpu_to_le32(inode->i_gid);
+ di->i_nlink = cpu_to_le32(inode->i_nlink);
+ di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+ di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+ di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ i_sblock = BFS_I(inode)->i_sblock;
+ di->i_sblock = cpu_to_le32(i_sblock);
+ di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
+ di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
mark_buffer_dirty(bh);
+ dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off);
brelse(bh);
unlock_kernel();
return 0;
@@ -140,11 +148,14 @@ static void bfs_delete_inode(struct inode * inode)
int block, off;
struct super_block * s = inode->i_sb;
struct bfs_sb_info * info = BFS_SB(s);
+ struct bfs_inode_info * bi = BFS_I(inode);
- dprintf("ino=%08lx\n", inode->i_ino);
+ dprintf("ino=%08lx\n", ino);
- if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) {
- printf("invalid ino=%08lx\n", inode->i_ino);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (ino < BFS_ROOT_INO || ino > info->si_lasti) {
+ printf("invalid ino=%08lx\n", ino);
return;
}
@@ -160,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode)
return;
}
off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK;
- di = (struct bfs_inode *)bh->b_data + off;
- if (di->i_ino) {
- info->si_freeb += BFS_FILEBLOCKS(di);
+ di = (struct bfs_inode *) bh->b_data + off;
+ if (bi->i_dsk_ino) {
+ info->si_freeb += 1 + bi->i_eblock - bi->i_sblock;
info->si_freei++;
- clear_bit(di->i_ino, info->si_imap);
+ clear_bit(ino, info->si_imap);
dump_imap("delete_inode", s);
- }
+ }
di->i_ino = 0;
di->i_sblock = 0;
mark_buffer_dirty(bh);
@@ -272,14 +283,14 @@ static struct super_operations bfs_sops = {
void dump_imap(const char *prefix, struct super_block * s)
{
-#if 0
+#ifdef DEBUG
int i;
char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL);
if (!tmpbuf)
return;
for (i=BFS_SB(s)->si_lasti; i>=0; i--) {
- if (i>PAGE_SIZE-100) break;
+ if (i > PAGE_SIZE-100) break;
if (test_bit(i, BFS_SB(s)->si_imap))
strcat(tmpbuf, "1");
else
@@ -295,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct buffer_head * bh;
struct bfs_super_block * bfs_sb;
struct inode * inode;
- int i, imap_len;
+ unsigned i, imap_len;
struct bfs_sb_info * info;
info = kmalloc(sizeof(*info), GFP_KERNEL);
@@ -310,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
if(!bh)
goto out;
bfs_sb = (struct bfs_super_block *)bh->b_data;
- if (bfs_sb->s_magic != BFS_MAGIC) {
+ if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
printf("No BFS filesystem on %s (magic=%08x)\n",
- s->s_id, bfs_sb->s_magic);
+ s->s_id, le32_to_cpu(bfs_sb->s_magic));
goto out;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
printf("%s is unclean, continuing\n", s->s_id);
s->s_magic = BFS_MAGIC;
- info->si_bfs_sb = bfs_sb;
info->si_sbh = bh;
- info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode)
+ info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode)
+ BFS_ROOT_INO - 1;
imap_len = info->si_lasti/8 + 1;
@@ -346,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
goto out;
}
- info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
- info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS;
+ info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */
+ info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS;
info->si_freei = 0;
info->si_lf_eblk = 0;
info->si_lf_sblk = 0;
diff --git a/fs/bio.c b/fs/bio.c
index a7d4fd3a3299..83a349574567 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -683,7 +683,7 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
{
struct sg_iovec iov;
- iov.iov_base = (__user void *)uaddr;
+ iov.iov_base = (void __user *)uaddr;
iov.iov_len = len;
return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
diff --git a/fs/buffer.c b/fs/buffer.c
index 1c62203a4906..6cbfceabd95d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -40,6 +40,7 @@
#include <linux/cpu.h>
#include <linux/bitops.h>
#include <linux/mpage.h>
+#include <linux/bit_spinlock.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void invalidate_bh_lrus(void);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3217ac5f6bd7..2335f14a1583 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3215,10 +3215,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
}
cifs_sb->tcon = NULL;
- if (ses) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(HZ / 2);
- }
+ if (ses)
+ schedule_timeout_interruptible(msecs_to_jiffies(500));
if (ses)
sesInfoFree(ses);
diff --git a/fs/compat.c b/fs/compat.c
index 8c665705c6a0..ac3fb9ed8eea 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1619,6 +1619,7 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
char *bits;
long timeout;
int size, max_fdset, ret = -EINVAL;
+ struct fdtable *fdt;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -1644,7 +1645,10 @@ compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
- max_fdset = current->files->max_fdset;
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 155e612635f1..e28a74203f3b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -798,13 +798,16 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
r = (void *) &r4;
}
- if (ret)
- return -EFAULT;
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
set_fs (KERNEL_DS);
ret = sys_ioctl (fd, cmd, (unsigned long) r);
set_fs (old_fs);
+out:
if (mysock)
sockfd_put(mysock);
diff --git a/fs/cramfs/uncompress.c b/fs/cramfs/uncompress.c
index 5034365b06a8..8def89f2c438 100644
--- a/fs/cramfs/uncompress.c
+++ b/fs/cramfs/uncompress.c
@@ -19,6 +19,7 @@
#include <linux/errno.h>
#include <linux/vmalloc.h>
#include <linux/zlib.h>
+#include <linux/cramfs_fs.h>
static z_stream stream;
static int initialized;
diff --git a/fs/dcache.c b/fs/dcache.c
index a15a2e1f5520..7376b61269fb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -337,12 +337,10 @@ struct dentry * d_find_alias(struct inode *inode)
*/
void d_prune_aliases(struct inode *inode)
{
- struct list_head *tmp, *head = &inode->i_dentry;
+ struct dentry *dentry;
restart:
spin_lock(&dcache_lock);
- tmp = head;
- while ((tmp = tmp->next) != head) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
if (!atomic_read(&dentry->d_count)) {
__dget_locked(dentry);
@@ -463,10 +461,7 @@ void shrink_dcache_sb(struct super_block * sb)
* superblock to the most recent end of the unused list.
*/
spin_lock(&dcache_lock);
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
@@ -478,10 +473,7 @@ void shrink_dcache_sb(struct super_block * sb)
* Pass two ... free the dentries for this superblock.
*/
repeat:
- next = dentry_unused.next;
- while (next != &dentry_unused) {
- tmp = next;
- next = tmp->next;
+ list_for_each_safe(tmp, next, &dentry_unused) {
dentry = list_entry(tmp, struct dentry, d_lru);
if (dentry->d_sb != sb)
continue;
diff --git a/fs/exec.c b/fs/exec.c
index 222ab1c572d8..14dd03907ccb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -798,6 +798,7 @@ no_thread_group:
static inline void flush_old_files(struct files_struct * files)
{
long j = -1;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
for (;;) {
@@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files)
j++;
i = j * __NFDBITS;
- if (i >= files->max_fds || i >= files->max_fdset)
+ fdt = files_fdtable(files);
+ if (i >= fdt->max_fds || i >= fdt->max_fdset)
break;
- set = files->close_on_exec->fds_bits[j];
+ set = fdt->close_on_exec->fds_bits[j];
if (!set)
continue;
- files->close_on_exec->fds_bits[j] = 0;
+ fdt->close_on_exec->fds_bits[j] = 0;
spin_unlock(&files->file_lock);
for ( ; set ; i++,set >>= 1) {
if (set & 1) {
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 161f156d98c8..c8d07030c897 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -615,6 +615,11 @@ got:
DQUOT_DROP(inode);
goto fail2;
}
+ err = ext2_init_security(inode,dir);
+ if (err) {
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
mark_inode_dirty(inode);
ext2_debug("allocating inode %lu\n", inode->i_ino);
ext2_preread_inode(inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 53dceb0c6593..fdba4d1d3c60 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -71,6 +71,8 @@ void ext2_put_inode(struct inode *inode)
*/
void ext2_delete_inode (struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
EXT2_I(inode)->i_dtime = get_seconds();
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 5f3bfde3b810..67cfeb66e897 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,3 +116,11 @@ exit_ext2_xattr(void)
# endif /* CONFIG_EXT2_FS_XATTR */
+#ifdef CONFIG_EXT2_FS_SECURITY
+extern int ext2_init_security(struct inode *inode, struct inode *dir);
+#else
+static inline int ext2_init_security(struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+#endif
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 6a6c59fbe599..a26612798471 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/smp_lock.h>
#include <linux/ext2_fs.h>
+#include <linux/security.h>
#include "xattr.h"
static size_t
@@ -45,6 +46,27 @@ ext2_xattr_security_set(struct inode *inode, const char *name,
value, size, flags);
}
+int
+ext2_init_security(struct inode *inode, struct inode *dir)
+{
+ int err;
+ size_t len;
+ void *value;
+ char *name;
+
+ err = security_inode_init_security(inode, dir, &name, &value, &len);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY,
+ name, value, len, 0);
+ kfree(name);
+ kfree(value);
+ return err;
+}
+
struct xattr_handler ext2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext2_xattr_security_list,
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 6981bd014ede..96552769d039 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -607,6 +607,11 @@ got:
DQUOT_DROP(inode);
goto fail2;
}
+ err = ext3_init_security(handle,inode, dir);
+ if (err) {
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
err = ext3_mark_inode_dirty(handle, inode);
if (err) {
ext3_std_error(sb, err);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9989fdcf4d5a..b5177c90d6f1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -187,6 +187,8 @@ void ext3_delete_inode (struct inode * inode)
{
handle_t *handle;
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index eb31a69e82dc..2ceae38f3d49 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -133,3 +133,14 @@ exit_ext3_xattr(void)
#define ext3_xattr_handlers NULL
# endif /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+ struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+#endif
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ddc1c41750e1..b9c40c15647b 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -9,6 +9,7 @@
#include <linux/smp_lock.h>
#include <linux/ext3_jbd.h>
#include <linux/ext3_fs.h>
+#include <linux/security.h>
#include "xattr.h"
static size_t
@@ -47,6 +48,27 @@ ext3_xattr_security_set(struct inode *inode, const char *name,
value, size, flags);
}
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+ int err;
+ size_t len;
+ void *value;
+ char *name;
+
+ err = security_inode_init_security(inode, dir, &name, &value, &len);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+ err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+ name, value, len, 0);
+ kfree(name);
+ kfree(value);
+ return err;
+}
+
struct xattr_handler ext3_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext3_xattr_security_list,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 96ae85b67eba..a7cbe68e2259 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -335,6 +335,8 @@ EXPORT_SYMBOL(fat_build_inode);
static void fat_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (!is_bad_inode(inode)) {
inode->i_size = 0;
fat_truncate(inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6fbc9d8fcc36..863b46e0d78a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/signal.h>
+#include <linux/rcupdate.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -24,21 +25,25 @@
void fastcall set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
if (flag)
- FD_SET(fd, files->close_on_exec);
+ FD_SET(fd, fdt->close_on_exec);
else
- FD_CLR(fd, files->close_on_exec);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
}
static inline int get_close_on_exec(unsigned int fd)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
int res;
- spin_lock(&files->file_lock);
- res = FD_ISSET(fd, files->close_on_exec);
- spin_unlock(&files->file_lock);
+ rcu_read_lock();
+ fdt = files_fdtable(files);
+ res = FD_ISSET(fd, fdt->close_on_exec);
+ rcu_read_unlock();
return res;
}
@@ -54,24 +59,26 @@ static int locate_fd(struct files_struct *files,
unsigned int newfd;
unsigned int start;
int error;
+ struct fdtable *fdt;
error = -EINVAL;
if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
goto out;
repeat:
+ fdt = files_fdtable(files);
/*
* Someone might have closed fd's in the range
- * orig_start..files->next_fd
+ * orig_start..fdt->next_fd
*/
start = orig_start;
- if (start < files->next_fd)
- start = files->next_fd;
+ if (start < fdt->next_fd)
+ start = fdt->next_fd;
newfd = start;
- if (start < files->max_fdset) {
- newfd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset, start);
+ if (start < fdt->max_fdset) {
+ newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset, start);
}
error = -EMFILE;
@@ -89,9 +96,15 @@ repeat:
if (error)
goto repeat;
- if (start <= files->next_fd)
- files->next_fd = newfd + 1;
-
+ /*
+ * We reacquired files_lock, so we are safe as long as
+ * we reacquire the fdtable pointer and use it while holding
+ * the lock, no one can free it during that time.
+ */
+ fdt = files_fdtable(files);
+ if (start <= fdt->next_fd)
+ fdt->next_fd = newfd + 1;
+
error = newfd;
out:
@@ -101,13 +114,16 @@ out:
static int dupfd(struct file *file, unsigned int start)
{
struct files_struct * files = current->files;
+ struct fdtable *fdt;
int fd;
spin_lock(&files->file_lock);
fd = locate_fd(files, file, start);
if (fd >= 0) {
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
+ /* locate_fd() may have expanded fdtable, load the ptr */
+ fdt = files_fdtable(files);
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
fd_install(fd, file);
} else {
@@ -123,6 +139,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
int err = -EBADF;
struct file * file, *tofree;
struct files_struct * files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
if (!(file = fcheck(oldfd)))
@@ -148,13 +165,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
/* Yes. It's a race. In user space. Nothing sane to do */
err = -EBUSY;
- tofree = files->fd[newfd];
- if (!tofree && FD_ISSET(newfd, files->open_fds))
+ fdt = files_fdtable(files);
+ tofree = fdt->fd[newfd];
+ if (!tofree && FD_ISSET(newfd, fdt->open_fds))
goto out_fput;
- files->fd[newfd] = file;
- FD_SET(newfd, files->open_fds);
- FD_CLR(newfd, files->close_on_exec);
+ rcu_assign_pointer(fdt->fd[newfd], file);
+ FD_SET(newfd, fdt->open_fds);
+ FD_CLR(newfd, fdt->close_on_exec);
spin_unlock(&files->file_lock);
if (tofree)
diff --git a/fs/file.c b/fs/file.c
index 92b5f25985d2..2127a7b9dc3a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
#include <linux/vmalloc.h>
#include <linux/file.h>
#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/workqueue.h>
+
+struct fdtable_defer {
+ spinlock_t lock;
+ struct work_struct wq;
+ struct timer_list timer;
+ struct fdtable *next;
+};
+
+/*
+ * We use this list to defer free fdtables that have vmalloced
+ * sets/arrays. By keeping a per-cpu list, we avoid having to embed
+ * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
+ * this per-task structure.
+ */
+static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
/*
@@ -48,82 +67,143 @@ void free_fd_array(struct file **array, int num)
vfree(array);
}
-/*
- * Expand the fd array in the files_struct. Called with the files
- * spinlock held for write.
- */
+static void __free_fdtable(struct fdtable *fdt)
+{
+ int fdset_size, fdarray_size;
-static int expand_fd_array(struct files_struct *files, int nr)
- __releases(files->file_lock)
- __acquires(files->file_lock)
+ fdset_size = fdt->max_fdset / 8;
+ fdarray_size = fdt->max_fds * sizeof(struct file *);
+ free_fdset(fdt->open_fds, fdset_size);
+ free_fdset(fdt->close_on_exec, fdset_size);
+ free_fd_array(fdt->fd, fdarray_size);
+ kfree(fdt);
+}
+
+static void fdtable_timer(unsigned long data)
{
- struct file **new_fds;
- int error, nfds;
+ struct fdtable_defer *fddef = (struct fdtable_defer *)data;
-
- error = -EMFILE;
- if (files->max_fds >= NR_OPEN || nr >= NR_OPEN)
+ spin_lock(&fddef->lock);
+ /*
+ * If someone already emptied the queue return.
+ */
+ if (!fddef->next)
goto out;
+ if (!schedule_work(&fddef->wq))
+ mod_timer(&fddef->timer, 5);
+out:
+ spin_unlock(&fddef->lock);
+}
- nfds = files->max_fds;
- spin_unlock(&files->file_lock);
+static void free_fdtable_work(struct fdtable_defer *f)
+{
+ struct fdtable *fdt;
- /*
- * Expand to the max in easy steps, and keep expanding it until
- * we have enough for the requested fd array size.
- */
+ spin_lock_bh(&f->lock);
+ fdt = f->next;
+ f->next = NULL;
+ spin_unlock_bh(&f->lock);
+ while(fdt) {
+ struct fdtable *next = fdt->next;
+ __free_fdtable(fdt);
+ fdt = next;
+ }
+}
- do {
-#if NR_OPEN_DEFAULT < 256
- if (nfds < 256)
- nfds = 256;
- else
-#endif
- if (nfds < (PAGE_SIZE / sizeof(struct file *)))
- nfds = PAGE_SIZE / sizeof(struct file *);
- else {
- nfds = nfds * 2;
- if (nfds > NR_OPEN)
- nfds = NR_OPEN;
- }
- } while (nfds <= nr);
+static void free_fdtable_rcu(struct rcu_head *rcu)
+{
+ struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
+ int fdset_size, fdarray_size;
+ struct fdtable_defer *fddef;
- error = -ENOMEM;
- new_fds = alloc_fd_array(nfds);
- spin_lock(&files->file_lock);
- if (!new_fds)
- goto out;
+ BUG_ON(!fdt);
+ fdset_size = fdt->max_fdset / 8;
+ fdarray_size = fdt->max_fds * sizeof(struct file *);
- /* Copy the existing array and install the new pointer */
-
- if (nfds > files->max_fds) {
- struct file **old_fds;
- int i;
-
- old_fds = xchg(&files->fd, new_fds);
- i = xchg(&files->max_fds, nfds);
-
- /* Don't copy/clear the array if we are creating a new
- fd array for fork() */
- if (i) {
- memcpy(new_fds, old_fds, i * sizeof(struct file *));
- /* clear the remainder of the array */
- memset(&new_fds[i], 0,
- (nfds-i) * sizeof(struct file *));
-
- spin_unlock(&files->file_lock);
- free_fd_array(old_fds, i);
- spin_lock(&files->file_lock);
- }
+ if (fdt->free_files) {
+ /*
+ * The this fdtable was embedded in the files structure
+ * and the files structure itself was getting destroyed.
+ * It is now safe to free the files structure.
+ */
+ kmem_cache_free(files_cachep, fdt->free_files);
+ return;
+ }
+ if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
+ /*
+ * The fdtable was embedded
+ */
+ return;
+ }
+ if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
+ kfree(fdt->open_fds);
+ kfree(fdt->close_on_exec);
+ kfree(fdt->fd);
+ kfree(fdt);
} else {
- /* Somebody expanded the array while we slept ... */
- spin_unlock(&files->file_lock);
- free_fd_array(new_fds, nfds);
- spin_lock(&files->file_lock);
+ fddef = &get_cpu_var(fdtable_defer_list);
+ spin_lock(&fddef->lock);
+ fdt->next = fddef->next;
+ fddef->next = fdt;
+ /*
+ * vmallocs are handled from the workqueue context.
+ * If the per-cpu workqueue is running, then we
+ * defer work scheduling through a timer.
+ */
+ if (!schedule_work(&fddef->wq))
+ mod_timer(&fddef->timer, 5);
+ spin_unlock(&fddef->lock);
+ put_cpu_var(fdtable_defer_list);
}
- error = 0;
-out:
- return error;
+}
+
+void free_fdtable(struct fdtable *fdt)
+{
+ if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
+ fdt->max_fds > NR_OPEN_DEFAULT)
+ call_rcu(&fdt->rcu, free_fdtable_rcu);
+}
+
+/*
+ * Expand the fdset in the files_struct. Called with the files spinlock
+ * held for write.
+ */
+static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
+{
+ int i;
+ int count;
+
+ BUG_ON(nfdt->max_fdset < fdt->max_fdset);
+ BUG_ON(nfdt->max_fds < fdt->max_fds);
+ /* Copy the existing tables and install the new pointers */
+
+ i = fdt->max_fdset / (sizeof(unsigned long) * 8);
+ count = (nfdt->max_fdset - fdt->max_fdset) / 8;
+
+ /*
+ * Don't copy the entire array if the current fdset is
+ * not yet initialised.
+ */
+ if (i) {
+ memcpy (nfdt->open_fds, fdt->open_fds,
+ fdt->max_fdset/8);
+ memcpy (nfdt->close_on_exec, fdt->close_on_exec,
+ fdt->max_fdset/8);
+ memset (&nfdt->open_fds->fds_bits[i], 0, count);
+ memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
+ }
+
+ /* Don't copy/clear the array if we are creating a new
+ fd array for fork() */
+ if (fdt->max_fds) {
+ memcpy(nfdt->fd, fdt->fd,
+ fdt->max_fds * sizeof(struct file *));
+ /* clear the remainder of the array */
+ memset(&nfdt->fd[fdt->max_fds], 0,
+ (nfdt->max_fds - fdt->max_fds) *
+ sizeof(struct file *));
+ }
+ nfdt->next_fd = fdt->next_fd;
}
/*
@@ -154,26 +234,21 @@ void free_fdset(fd_set *array, int num)
vfree(array);
}
-/*
- * Expand the fdset in the files_struct. Called with the files spinlock
- * held for write.
- */
-static int expand_fdset(struct files_struct *files, int nr)
- __releases(file->file_lock)
- __acquires(file->file_lock)
+static struct fdtable *alloc_fdtable(int nr)
{
- fd_set *new_openset = NULL, *new_execset = NULL;
- int error, nfds = 0;
-
- error = -EMFILE;
- if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN)
- goto out;
+ struct fdtable *fdt = NULL;
+ int nfds = 0;
+ fd_set *new_openset = NULL, *new_execset = NULL;
+ struct file **new_fds;
- nfds = files->max_fdset;
- spin_unlock(&files->file_lock);
+ fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
+ if (!fdt)
+ goto out;
+ memset(fdt, 0, sizeof(*fdt));
- /* Expand to the max in easy steps */
- do {
+ nfds = __FD_SETSIZE;
+ /* Expand to the max in easy steps */
+ do {
if (nfds < (PAGE_SIZE * 8))
nfds = PAGE_SIZE * 8;
else {
@@ -183,49 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
}
} while (nfds <= nr);
- error = -ENOMEM;
- new_openset = alloc_fdset(nfds);
- new_execset = alloc_fdset(nfds);
- spin_lock(&files->file_lock);
- if (!new_openset || !new_execset)
+ new_openset = alloc_fdset(nfds);
+ new_execset = alloc_fdset(nfds);
+ if (!new_openset || !new_execset)
+ goto out;
+ fdt->open_fds = new_openset;
+ fdt->close_on_exec = new_execset;
+ fdt->max_fdset = nfds;
+
+ nfds = NR_OPEN_DEFAULT;
+ /*
+ * Expand to the max in easy steps, and keep expanding it until
+ * we have enough for the requested fd array size.
+ */
+ do {
+#if NR_OPEN_DEFAULT < 256
+ if (nfds < 256)
+ nfds = 256;
+ else
+#endif
+ if (nfds < (PAGE_SIZE / sizeof(struct file *)))
+ nfds = PAGE_SIZE / sizeof(struct file *);
+ else {
+ nfds = nfds * 2;
+ if (nfds > NR_OPEN)
+ nfds = NR_OPEN;
+ }
+ } while (nfds <= nr);
+ new_fds = alloc_fd_array(nfds);
+ if (!new_fds)
+ goto out;
+ fdt->fd = new_fds;
+ fdt->max_fds = nfds;
+ fdt->free_files = NULL;
+ return fdt;
+out:
+ if (new_openset)
+ free_fdset(new_openset, nfds);
+ if (new_execset)
+ free_fdset(new_execset, nfds);
+ kfree(fdt);
+ return NULL;
+}
+
+/*
+ * Expands the file descriptor table - it will allocate a new fdtable and
+ * both fd array and fdset. It is expected to be called with the
+ * files_lock held.
+ */
+static int expand_fdtable(struct files_struct *files, int nr)
+ __releases(files->file_lock)
+ __acquires(files->file_lock)
+{
+ int error = 0;
+ struct fdtable *fdt;
+ struct fdtable *nfdt = NULL;
+
+ spin_unlock(&files->file_lock);
+ nfdt = alloc_fdtable(nr);
+ if (!nfdt) {
+ error = -ENOMEM;
+ spin_lock(&files->file_lock);
goto out;
+ }
- error = 0;
-
- /* Copy the existing tables and install the new pointers */
- if (nfds > files->max_fdset) {
- int i = files->max_fdset / (sizeof(unsigned long) * 8);
- int count = (nfds - files->max_fdset) / 8;
-
- /*
- * Don't copy the entire array if the current fdset is
- * not yet initialised.
- */
- if (i) {
- memcpy (new_openset, files->open_fds, files->max_fdset/8);
- memcpy (new_execset, files->close_on_exec, files->max_fdset/8);
- memset (&new_openset->fds_bits[i], 0, count);
- memset (&new_execset->fds_bits[i], 0, count);
- }
-
- nfds = xchg(&files->max_fdset, nfds);
- new_openset = xchg(&files->open_fds, new_openset);
- new_execset = xchg(&files->close_on_exec, new_execset);
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+ /*
+ * Check again since another task may have expanded the
+ * fd table while we dropped the lock
+ */
+ if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
+ copy_fdtable(nfdt, fdt);
+ } else {
+ /* Somebody expanded while we dropped file_lock */
spin_unlock(&files->file_lock);
- free_fdset (new_openset, nfds);
- free_fdset (new_execset, nfds);
+ __free_fdtable(nfdt);
spin_lock(&files->file_lock);
- return 0;
- }
- /* Somebody expanded the array while we slept ... */
-
+ goto out;
+ }
+ rcu_assign_pointer(files->fdt, nfdt);
+ free_fdtable(fdt);
out:
- spin_unlock(&files->file_lock);
- if (new_openset)
- free_fdset(new_openset, nfds);
- if (new_execset)
- free_fdset(new_execset, nfds);
- spin_lock(&files->file_lock);
return error;
}
@@ -237,18 +351,39 @@ out:
int expand_files(struct files_struct *files, int nr)
{
int err, expand = 0;
+ struct fdtable *fdt;
- if (nr >= files->max_fdset) {
- expand = 1;
- if ((err = expand_fdset(files, nr)))
+ fdt = files_fdtable(files);
+ if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
+ if (fdt->max_fdset >= NR_OPEN ||
+ fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
+ err = -EMFILE;
goto out;
- }
- if (nr >= files->max_fds) {
+ }
expand = 1;
- if ((err = expand_fd_array(files, nr)))
+ if ((err = expand_fdtable(files, nr)))
goto out;
}
err = expand;
out:
return err;
}
+
+static void __devinit fdtable_defer_list_init(int cpu)
+{
+ struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
+ spin_lock_init(&fddef->lock);
+ INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+ init_timer(&fddef->timer);
+ fddef->timer.data = (unsigned long)fddef;
+ fddef->timer.function = fdtable_timer;
+ fddef->next = NULL;
+}
+
+void __init files_defer_init(void)
+{
+ int i;
+ /* Really early - can't use for_each_cpu */
+ for (i = 0; i < NR_CPUS; i++)
+ fdtable_defer_list_init(i);
+}
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e1737de2..86ec8ae985b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/security.h>
#include <linux/eventpoll.h>
+#include <linux/rcupdate.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
spin_unlock_irqrestore(&filp_count_lock, flags);
}
-static inline void file_free(struct file *f)
+static inline void file_free_rcu(struct rcu_head *head)
{
+ struct file *f = container_of(head, struct file, f_rcuhead);
kmem_cache_free(filp_cachep, f);
}
+static inline void file_free(struct file *f)
+{
+ call_rcu(&f->f_rcuhead, file_free_rcu);
+}
+
/* Find an unused file structure and return a pointer to it.
* Returns NULL, if there are no more free file structures or
* we run out of memory.
@@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
void fastcall fput(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count))
+ if (rcuref_dec_and_test(&file->f_count))
__fput(file);
}
@@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
struct file *file;
struct files_struct *files = current->files;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
- if (file)
- get_file(file);
- spin_unlock(&files->file_lock);
+ if (file) {
+ if (!rcuref_inc_lf(&file->f_count)) {
+ /* File object ref couldn't be taken */
+ rcu_read_unlock();
+ return NULL;
+ }
+ }
+ rcu_read_unlock();
+
return file;
}
@@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
if (likely((atomic_read(&files->count) == 1))) {
file = fcheck_files(files, fd);
} else {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
- get_file(file);
- *fput_needed = 1;
+ if (rcuref_inc_lf(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
}
+
return file;
}
void put_filp(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
+ if (rcuref_dec_and_test(&file->f_count)) {
security_file_free(file);
file_kill(file);
file_free(file);
@@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages)
files_stat.max_files = n;
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
+ files_defer_init();
}
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
new file mode 100644
index 000000000000..c3e1f760cac9
--- /dev/null
+++ b/fs/fuse/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the FUSE filesystem.
+#
+
+obj-$(CONFIG_FUSE_FS) += fuse.o
+
+fuse-objs := dev.o dir.o file.o inode.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
new file mode 100644
index 000000000000..d4c869c6d01b
--- /dev/null
+++ b/fs/fuse/dev.c
@@ -0,0 +1,877 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/uio.h>
+#include <linux/miscdevice.h>
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+
+MODULE_ALIAS_MISCDEV(FUSE_MINOR);
+
+static kmem_cache_t *fuse_req_cachep;
+
+static inline struct fuse_conn *fuse_get_conn(struct file *file)
+{
+ struct fuse_conn *fc;
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ if (fc && !fc->mounted)
+ fc = NULL;
+ spin_unlock(&fuse_lock);
+ return fc;
+}
+
+static inline void fuse_request_init(struct fuse_req *req)
+{
+ memset(req, 0, sizeof(*req));
+ INIT_LIST_HEAD(&req->list);
+ init_waitqueue_head(&req->waitq);
+ atomic_set(&req->count, 1);
+}
+
+struct fuse_req *fuse_request_alloc(void)
+{
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
+ if (req)
+ fuse_request_init(req);
+ return req;
+}
+
+void fuse_request_free(struct fuse_req *req)
+{
+ kmem_cache_free(fuse_req_cachep, req);
+}
+
+static inline void block_sigs(sigset_t *oldset)
+{
+ sigset_t mask;
+
+ siginitsetinv(&mask, sigmask(SIGKILL));
+ sigprocmask(SIG_BLOCK, &mask, oldset);
+}
+
+static inline void restore_sigs(sigset_t *oldset)
+{
+ sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
+void fuse_reset_request(struct fuse_req *req)
+{
+ int preallocated = req->preallocated;
+ BUG_ON(atomic_read(&req->count) != 1);
+ fuse_request_init(req);
+ req->preallocated = preallocated;
+}
+
+static void __fuse_get_request(struct fuse_req *req)
+{
+ atomic_inc(&req->count);
+}
+
+/* Must be called with > 1 refcount */
+static void __fuse_put_request(struct fuse_req *req)
+{
+ BUG_ON(atomic_read(&req->count) < 2);
+ atomic_dec(&req->count);
+}
+
+static struct fuse_req *do_get_request(struct fuse_conn *fc)
+{
+ struct fuse_req *req;
+
+ spin_lock(&fuse_lock);
+ BUG_ON(list_empty(&fc->unused_list));
+ req = list_entry(fc->unused_list.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ spin_unlock(&fuse_lock);
+ fuse_request_init(req);
+ req->preallocated = 1;
+ req->in.h.uid = current->fsuid;
+ req->in.h.gid = current->fsgid;
+ req->in.h.pid = current->pid;
+ return req;
+}
+
+/* This can return NULL, but only in case it's interrupted by a SIGKILL */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc)
+{
+ int intr;
+ sigset_t oldset;
+
+ block_sigs(&oldset);
+ intr = down_interruptible(&fc->outstanding_sem);
+ restore_sigs(&oldset);
+ return intr ? NULL : do_get_request(fc);
+}
+
+static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ spin_lock(&fuse_lock);
+ if (req->preallocated)
+ list_add(&req->list, &fc->unused_list);
+ else
+ fuse_request_free(req);
+
+ /* If we are in debt decrease that first */
+ if (fc->outstanding_debt)
+ fc->outstanding_debt--;
+ else
+ up(&fc->outstanding_sem);
+ spin_unlock(&fuse_lock);
+}
+
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (atomic_dec_and_test(&req->count))
+ fuse_putback_request(fc, req);
+}
+
+void fuse_release_background(struct fuse_req *req)
+{
+ iput(req->inode);
+ iput(req->inode2);
+ if (req->file)
+ fput(req->file);
+ spin_lock(&fuse_lock);
+ list_del(&req->bg_entry);
+ spin_unlock(&fuse_lock);
+}
+
+/*
+ * This function is called when a request is finished. Either a reply
+ * has arrived or it was interrupted (and not yet sent) or some error
+ * occured during communication with userspace, or the device file was
+ * closed. It decreases the referece count for the request. In case
+ * of a background request the referece to the stored objects are
+ * released. The requester thread is woken up (if still waiting), and
+ * finally the request is either freed or put on the unused_list
+ *
+ * Called with fuse_lock, unlocks it
+ */
+static void request_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ int putback;
+ req->finished = 1;
+ putback = atomic_dec_and_test(&req->count);
+ spin_unlock(&fuse_lock);
+ if (req->background) {
+ down_read(&fc->sbput_sem);
+ if (fc->mounted)
+ fuse_release_background(req);
+ up_read(&fc->sbput_sem);
+ }
+ wake_up(&req->waitq);
+ if (req->in.h.opcode == FUSE_INIT) {
+ int i;
+
+ if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
+ fc->conn_error = 1;
+
+ /* After INIT reply is received other requests can go
+ out. So do (FUSE_MAX_OUTSTANDING - 1) number of
+ up()s on outstanding_sem. The last up() is done in
+ fuse_putback_request() */
+ for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+ up(&fc->outstanding_sem);
+ }
+ if (putback)
+ fuse_putback_request(fc, req);
+}
+
+/*
+ * Unfortunately request interruption not just solves the deadlock
+ * problem, it causes problems too. These stem from the fact, that an
+ * interrupted request is continued to be processed in userspace,
+ * while all the locks and object references (inode and file) held
+ * during the operation are released.
+ *
+ * To release the locks is exactly why there's a need to interrupt the
+ * request, so there's not a lot that can be done about this, except
+ * introduce additional locking in userspace.
+ *
+ * More important is to keep inode and file references until userspace
+ * has replied, otherwise FORGET and RELEASE could be sent while the
+ * inode/file is still used by the filesystem.
+ *
+ * For this reason the concept of "background" request is introduced.
+ * An interrupted request is backgrounded if it has been already sent
+ * to userspace. Backgrounding involves getting an extra reference to
+ * inode(s) or file used in the request, and adding the request to
+ * fc->background list. When a reply is received for a background
+ * request, the object references are released, and the request is
+ * removed from the list. If the filesystem is unmounted while there
+ * are still background requests, the list is walked and references
+ * are released as if a reply was received.
+ *
+ * There's one more use for a background request. The RELEASE message is
+ * always sent as background, since it doesn't return an error or
+ * data.
+ */
+static void background_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->background = 1;
+ list_add(&req->bg_entry, &fc->background);
+ if (req->inode)
+ req->inode = igrab(req->inode);
+ if (req->inode2)
+ req->inode2 = igrab(req->inode2);
+ if (req->file)
+ get_file(req->file);
+}
+
+/* Called with fuse_lock held. Releases, and then reacquires it. */
+static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+{
+ sigset_t oldset;
+
+ spin_unlock(&fuse_lock);
+ block_sigs(&oldset);
+ wait_event_interruptible(req->waitq, req->finished);
+ restore_sigs(&oldset);
+ spin_lock(&fuse_lock);
+ if (req->finished)
+ return;
+
+ req->out.h.error = -EINTR;
+ req->interrupted = 1;
+ if (req->locked) {
+ /* This is uninterruptible sleep, because data is
+ being copied to/from the buffers of req. During
+ locked state, there mustn't be any filesystem
+ operation (e.g. page fault), since that could lead
+ to deadlock */
+ spin_unlock(&fuse_lock);
+ wait_event(req->waitq, !req->locked);
+ spin_lock(&fuse_lock);
+ }
+ if (!req->sent && !list_empty(&req->list)) {
+ list_del(&req->list);
+ __fuse_put_request(req);
+ } else if (!req->finished && req->sent)
+ background_request(fc, req);
+}
+
+static unsigned len_args(unsigned numargs, struct fuse_arg *args)
+{
+ unsigned nbytes = 0;
+ unsigned i;
+
+ for (i = 0; i < numargs; i++)
+ nbytes += args[i].size;
+
+ return nbytes;
+}
+
+static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
+{
+ fc->reqctr++;
+ /* zero is special */
+ if (fc->reqctr == 0)
+ fc->reqctr = 1;
+ req->in.h.unique = fc->reqctr;
+ req->in.h.len = sizeof(struct fuse_in_header) +
+ len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
+ if (!req->preallocated) {
+ /* If request is not preallocated (either FORGET or
+ RELEASE), then still decrease outstanding_sem, so
+ user can't open infinite number of files while not
+ processing the RELEASE requests. However for
+ efficiency do it without blocking, so if down()
+ would block, just increase the debt instead */
+ if (down_trylock(&fc->outstanding_sem))
+ fc->outstanding_debt++;
+ }
+ list_add_tail(&req->list, &fc->pending);
+ wake_up(&fc->waitq);
+}
+
+/*
+ * This can only be interrupted by a SIGKILL
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ spin_lock(&fuse_lock);
+ if (!fc->connected)
+ req->out.h.error = -ENOTCONN;
+ else if (fc->conn_error)
+ req->out.h.error = -ECONNREFUSED;
+ else {
+ queue_request(fc, req);
+ /* acquire extra reference, since request is still needed
+ after request_end() */
+ __fuse_get_request(req);
+
+ request_wait_answer(fc, req);
+ }
+ spin_unlock(&fuse_lock);
+}
+
+static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
+{
+ spin_lock(&fuse_lock);
+ if (fc->connected) {
+ queue_request(fc, req);
+ spin_unlock(&fuse_lock);
+ } else {
+ req->out.h.error = -ENOTCONN;
+ request_end(fc, req);
+ }
+}
+
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 0;
+ request_send_nowait(fc, req);
+}
+
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ spin_lock(&fuse_lock);
+ background_request(fc, req);
+ spin_unlock(&fuse_lock);
+ request_send_nowait(fc, req);
+}
+
+void fuse_send_init(struct fuse_conn *fc)
+{
+ /* This is called from fuse_read_super() so there's guaranteed
+ to be a request available */
+ struct fuse_req *req = do_get_request(fc);
+ struct fuse_init_in_out *arg = &req->misc.init_in_out;
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ req->in.h.opcode = FUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*arg);
+ req->in.args[0].value = arg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(*arg);
+ req->out.args[0].value = arg;
+ request_send_background(fc, req);
+}
+
+/*
+ * Lock the request. Up to the next unlock_request() there mustn't be
+ * anything that could cause a page-fault. If the request was already
+ * interrupted bail out.
+ */
+static inline int lock_request(struct fuse_req *req)
+{
+ int err = 0;
+ if (req) {
+ spin_lock(&fuse_lock);
+ if (req->interrupted)
+ err = -ENOENT;
+ else
+ req->locked = 1;
+ spin_unlock(&fuse_lock);
+ }
+ return err;
+}
+
+/*
+ * Unlock request. If it was interrupted during being locked, the
+ * requester thread is currently waiting for it to be unlocked, so
+ * wake it up.
+ */
+static inline void unlock_request(struct fuse_req *req)
+{
+ if (req) {
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (req->interrupted)
+ wake_up(&req->waitq);
+ spin_unlock(&fuse_lock);
+ }
+}
+
+struct fuse_copy_state {
+ int write;
+ struct fuse_req *req;
+ const struct iovec *iov;
+ unsigned long nr_segs;
+ unsigned long seglen;
+ unsigned long addr;
+ struct page *pg;
+ void *mapaddr;
+ void *buf;
+ unsigned len;
+};
+
+static void fuse_copy_init(struct fuse_copy_state *cs, int write,
+ struct fuse_req *req, const struct iovec *iov,
+ unsigned long nr_segs)
+{
+ memset(cs, 0, sizeof(*cs));
+ cs->write = write;
+ cs->req = req;
+ cs->iov = iov;
+ cs->nr_segs = nr_segs;
+}
+
+/* Unmap and put previous page of userspace buffer */
+static inline void fuse_copy_finish(struct fuse_copy_state *cs)
+{
+ if (cs->mapaddr) {
+ kunmap_atomic(cs->mapaddr, KM_USER0);
+ if (cs->write) {
+ flush_dcache_page(cs->pg);
+ set_page_dirty_lock(cs->pg);
+ }
+ put_page(cs->pg);
+ cs->mapaddr = NULL;
+ }
+}
+
+/*
+ * Get another pagefull of userspace buffer, and map it to kernel
+ * address space, and lock request
+ */
+static int fuse_copy_fill(struct fuse_copy_state *cs)
+{
+ unsigned long offset;
+ int err;
+
+ unlock_request(cs->req);
+ fuse_copy_finish(cs);
+ if (!cs->seglen) {
+ BUG_ON(!cs->nr_segs);
+ cs->seglen = cs->iov[0].iov_len;
+ cs->addr = (unsigned long) cs->iov[0].iov_base;
+ cs->iov ++;
+ cs->nr_segs --;
+ }
+ down_read(&current->mm->mmap_sem);
+ err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
+ &cs->pg, NULL);
+ up_read(&current->mm->mmap_sem);
+ if (err < 0)
+ return err;
+ BUG_ON(err != 1);
+ offset = cs->addr % PAGE_SIZE;
+ cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+ cs->buf = cs->mapaddr + offset;
+ cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->seglen -= cs->len;
+ cs->addr += cs->len;
+
+ return lock_request(cs->req);
+}
+
+/* Do as much copy to/from userspace buffer as we can */
+static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val,
+ unsigned *size)
+{
+ unsigned ncpy = min(*size, cs->len);
+ if (val) {
+ if (cs->write)
+ memcpy(cs->buf, *val, ncpy);
+ else
+ memcpy(*val, cs->buf, ncpy);
+ *val += ncpy;
+ }
+ *size -= ncpy;
+ cs->len -= ncpy;
+ cs->buf += ncpy;
+ return ncpy;
+}
+
+/*
+ * Copy a page in the request to/from the userspace buffer. Must be
+ * done atomically
+ */
+static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+ unsigned offset, unsigned count, int zeroing)
+{
+ if (page && zeroing && count < PAGE_SIZE) {
+ void *mapaddr = kmap_atomic(page, KM_USER1);
+ memset(mapaddr, 0, PAGE_SIZE);
+ kunmap_atomic(mapaddr, KM_USER1);
+ }
+ while (count) {
+ int err;
+ if (!cs->len && (err = fuse_copy_fill(cs)))
+ return err;
+ if (page) {
+ void *mapaddr = kmap_atomic(page, KM_USER1);
+ void *buf = mapaddr + offset;
+ offset += fuse_copy_do(cs, &buf, &count);
+ kunmap_atomic(mapaddr, KM_USER1);
+ } else
+ offset += fuse_copy_do(cs, NULL, &count);
+ }
+ if (page && !cs->write)
+ flush_dcache_page(page);
+ return 0;
+}
+
+/* Copy pages in the request to/from userspace buffer */
+static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
+ int zeroing)
+{
+ unsigned i;
+ struct fuse_req *req = cs->req;
+ unsigned offset = req->page_offset;
+ unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
+
+ for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
+ struct page *page = req->pages[i];
+ int err = fuse_copy_page(cs, page, offset, count, zeroing);
+ if (err)
+ return err;
+
+ nbytes -= count;
+ count = min(nbytes, (unsigned) PAGE_SIZE);
+ offset = 0;
+ }
+ return 0;
+}
+
+/* Copy a single argument in the request to/from userspace buffer */
+static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
+{
+ while (size) {
+ int err;
+ if (!cs->len && (err = fuse_copy_fill(cs)))
+ return err;
+ fuse_copy_do(cs, &val, &size);
+ }
+ return 0;
+}
+
+/* Copy request arguments to/from userspace buffer */
+static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
+ unsigned argpages, struct fuse_arg *args,
+ int zeroing)
+{
+ int err = 0;
+ unsigned i;
+
+ for (i = 0; !err && i < numargs; i++) {
+ struct fuse_arg *arg = &args[i];
+ if (i == numargs - 1 && argpages)
+ err = fuse_copy_pages(cs, arg->size, zeroing);
+ else
+ err = fuse_copy_one(cs, arg->value, arg->size);
+ }
+ return err;
+}
+
+/* Wait until a request is available on the pending list */
+static void request_wait(struct fuse_conn *fc)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&fc->waitq, &wait);
+ while (fc->mounted && list_empty(&fc->pending)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ break;
+
+ spin_unlock(&fuse_lock);
+ schedule();
+ spin_lock(&fuse_lock);
+ }
+ set_current_state(TASK_RUNNING);
+ remove_wait_queue(&fc->waitq, &wait);
+}
+
+/*
+ * Read a single request into the userspace filesystem's buffer. This
+ * function waits until a request is available, then removes it from
+ * the pending list and copies request data to userspace buffer. If
+ * no reply is needed (FORGET) or request has been interrupted or
+ * there was an error during the copying then it's finished by calling
+ * request_end(). Otherwise add it to the processing list, and set
+ * the 'sent' flag.
+ */
+static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *off)
+{
+ int err;
+ struct fuse_conn *fc;
+ struct fuse_req *req;
+ struct fuse_in *in;
+ struct fuse_copy_state cs;
+ unsigned reqsize;
+
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ err = -EPERM;
+ if (!fc)
+ goto err_unlock;
+ request_wait(fc);
+ err = -ENODEV;
+ if (!fc->mounted)
+ goto err_unlock;
+ err = -ERESTARTSYS;
+ if (list_empty(&fc->pending))
+ goto err_unlock;
+
+ req = list_entry(fc->pending.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ spin_unlock(&fuse_lock);
+
+ in = &req->in;
+ reqsize = req->in.h.len;
+ fuse_copy_init(&cs, 1, req, iov, nr_segs);
+ err = -EINVAL;
+ if (iov_length(iov, nr_segs) >= reqsize) {
+ err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+ if (!err)
+ err = fuse_copy_args(&cs, in->numargs, in->argpages,
+ (struct fuse_arg *) in->args, 0);
+ }
+ fuse_copy_finish(&cs);
+
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (!err && req->interrupted)
+ err = -ENOENT;
+ if (err) {
+ if (!req->interrupted)
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+ return err;
+ }
+ if (!req->isreply)
+ request_end(fc, req);
+ else {
+ req->sent = 1;
+ list_add_tail(&req->list, &fc->processing);
+ spin_unlock(&fuse_lock);
+ }
+ return reqsize;
+
+ err_unlock:
+ spin_unlock(&fuse_lock);
+ return err;
+}
+
+static ssize_t fuse_dev_read(struct file *file, char __user *buf,
+ size_t nbytes, loff_t *off)
+{
+ struct iovec iov;
+ iov.iov_len = nbytes;
+ iov.iov_base = buf;
+ return fuse_dev_readv(file, &iov, 1, off);
+}
+
+/* Look up request on processing list by unique ID */
+static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
+{
+ struct list_head *entry;
+
+ list_for_each(entry, &fc->processing) {
+ struct fuse_req *req;
+ req = list_entry(entry, struct fuse_req, list);
+ if (req->in.h.unique == unique)
+ return req;
+ }
+ return NULL;
+}
+
+static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
+ unsigned nbytes)
+{
+ unsigned reqsize = sizeof(struct fuse_out_header);
+
+ if (out->h.error)
+ return nbytes != reqsize ? -EINVAL : 0;
+
+ reqsize += len_args(out->numargs, out->args);
+
+ if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
+ return -EINVAL;
+ else if (reqsize > nbytes) {
+ struct fuse_arg *lastarg = &out->args[out->numargs-1];
+ unsigned diffsize = reqsize - nbytes;
+ if (diffsize > lastarg->size)
+ return -EINVAL;
+ lastarg->size -= diffsize;
+ }
+ return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
+ out->page_zeroing);
+}
+
+/*
+ * Write a single reply to a request. First the header is copied from
+ * the write buffer. The request is then searched on the processing
+ * list by the unique ID found in the header. If found, then remove
+ * it from the list and copy the rest of the buffer to the request.
+ * The request is finished by calling request_end()
+ */
+static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *off)
+{
+ int err;
+ unsigned nbytes = iov_length(iov, nr_segs);
+ struct fuse_req *req;
+ struct fuse_out_header oh;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -ENODEV;
+
+ fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
+ if (nbytes < sizeof(struct fuse_out_header))
+ return -EINVAL;
+
+ err = fuse_copy_one(&cs, &oh, sizeof(oh));
+ if (err)
+ goto err_finish;
+ err = -EINVAL;
+ if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
+ oh.len != nbytes)
+ goto err_finish;
+
+ spin_lock(&fuse_lock);
+ req = request_find(fc, oh.unique);
+ err = -EINVAL;
+ if (!req)
+ goto err_unlock;
+
+ list_del_init(&req->list);
+ if (req->interrupted) {
+ request_end(fc, req);
+ fuse_copy_finish(&cs);
+ return -ENOENT;
+ }
+ req->out.h = oh;
+ req->locked = 1;
+ cs.req = req;
+ spin_unlock(&fuse_lock);
+
+ err = copy_out_args(&cs, &req->out, nbytes);
+ fuse_copy_finish(&cs);
+
+ spin_lock(&fuse_lock);
+ req->locked = 0;
+ if (!err) {
+ if (req->interrupted)
+ err = -ENOENT;
+ } else if (!req->interrupted)
+ req->out.h.error = -EIO;
+ request_end(fc, req);
+
+ return err ? err : nbytes;
+
+ err_unlock:
+ spin_unlock(&fuse_lock);
+ err_finish:
+ fuse_copy_finish(&cs);
+ return err;
+}
+
+static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
+ size_t nbytes, loff_t *off)
+{
+ struct iovec iov;
+ iov.iov_len = nbytes;
+ iov.iov_base = (char __user *) buf;
+ return fuse_dev_writev(file, &iov, 1, off);
+}
+
+static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
+{
+ struct fuse_conn *fc = fuse_get_conn(file);
+ unsigned mask = POLLOUT | POLLWRNORM;
+
+ if (!fc)
+ return -ENODEV;
+
+ poll_wait(file, &fc->waitq, wait);
+
+ spin_lock(&fuse_lock);
+ if (!list_empty(&fc->pending))
+ mask |= POLLIN | POLLRDNORM;
+ spin_unlock(&fuse_lock);
+
+ return mask;
+}
+
+/* Abort all requests on the given list (pending or processing) */
+static void end_requests(struct fuse_conn *fc, struct list_head *head)
+{
+ while (!list_empty(head)) {
+ struct fuse_req *req;
+ req = list_entry(head->next, struct fuse_req, list);
+ list_del_init(&req->list);
+ req->out.h.error = -ECONNABORTED;
+ request_end(fc, req);
+ spin_lock(&fuse_lock);
+ }
+}
+
+static int fuse_dev_release(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc;
+
+ spin_lock(&fuse_lock);
+ fc = file->private_data;
+ if (fc) {
+ fc->connected = 0;
+ end_requests(fc, &fc->pending);
+ end_requests(fc, &fc->processing);
+ fuse_release_conn(fc);
+ }
+ spin_unlock(&fuse_lock);
+ return 0;
+}
+
+struct file_operations fuse_dev_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = fuse_dev_read,
+ .readv = fuse_dev_readv,
+ .write = fuse_dev_write,
+ .writev = fuse_dev_writev,
+ .poll = fuse_dev_poll,
+ .release = fuse_dev_release,
+};
+
+static struct miscdevice fuse_miscdevice = {
+ .minor = FUSE_MINOR,
+ .name = "fuse",
+ .fops = &fuse_dev_operations,
+};
+
+int __init fuse_dev_init(void)
+{
+ int err = -ENOMEM;
+ fuse_req_cachep = kmem_cache_create("fuse_request",
+ sizeof(struct fuse_req),
+ 0, 0, NULL, NULL);
+ if (!fuse_req_cachep)
+ goto out;
+
+ err = misc_register(&fuse_miscdevice);
+ if (err)
+ goto out_cache_clean;
+
+ return 0;
+
+ out_cache_clean:
+ kmem_cache_destroy(fuse_req_cachep);
+ out:
+ return err;
+}
+
+void fuse_dev_cleanup(void)
+{
+ misc_deregister(&fuse_miscdevice);
+ kmem_cache_destroy(fuse_req_cachep);
+}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
new file mode 100644
index 000000000000..e79e49b3eec7
--- /dev/null
+++ b/fs/fuse/dir.c
@@ -0,0 +1,982 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/file.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+
+static inline unsigned long time_to_jiffies(unsigned long sec,
+ unsigned long nsec)
+{
+ struct timespec ts = {sec, nsec};
+ return jiffies + timespec_to_jiffies(&ts);
+}
+
+static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
+ struct dentry *entry,
+ struct fuse_entry_out *outarg)
+{
+ req->in.h.opcode = FUSE_LOOKUP;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(struct fuse_entry_out);
+ req->out.args[0].value = outarg;
+}
+
+static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+{
+ if (!entry->d_inode || is_bad_inode(entry->d_inode))
+ return 0;
+ else if (time_after(jiffies, entry->d_time)) {
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode = entry->d_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return 0;
+
+ fuse_lookup_init(req, entry->d_parent->d_inode, entry, &outarg);
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err) {
+ if (outarg.nodeid != get_node_id(inode)) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return 0;
+ }
+ fi->nlookup ++;
+ }
+ fuse_put_request(fc, req);
+ if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ return 0;
+
+ fuse_change_attributes(inode, &outarg.attr);
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+ return 1;
+}
+
+static struct dentry_operations fuse_dentry_operations = {
+ .d_revalidate = fuse_dentry_revalidate,
+};
+
+static int fuse_lookup_iget(struct inode *dir, struct dentry *entry,
+ struct inode **inodep)
+{
+ int err;
+ struct fuse_entry_out outarg;
+ struct inode *inode = NULL;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req;
+
+ if (entry->d_name.len > FUSE_NAME_MAX)
+ return -ENAMETOOLONG;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ fuse_lookup_init(req, dir, entry, &outarg);
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err) {
+ inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr);
+ if (!inode) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return -ENOMEM;
+ }
+ }
+ fuse_put_request(fc, req);
+ if (err && err != -ENOENT)
+ return err;
+
+ if (inode) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+
+ entry->d_op = &fuse_dentry_operations;
+ *inodep = inode;
+ return 0;
+}
+
+void fuse_invalidate_attr(struct inode *inode)
+{
+ get_fuse_inode(inode)->i_time = jiffies - 1;
+}
+
+static void fuse_invalidate_entry(struct dentry *entry)
+{
+ d_invalidate(entry);
+ entry->d_time = jiffies - 1;
+}
+
+static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+ struct inode *dir, struct dentry *entry,
+ int mode)
+{
+ struct fuse_entry_out outarg;
+ struct inode *inode;
+ struct fuse_inode *fi;
+ int err;
+
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ fuse_put_request(fc, req);
+ return err;
+ }
+ inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
+ &outarg.attr);
+ if (!inode) {
+ fuse_send_forget(fc, req, outarg.nodeid, 1);
+ return -ENOMEM;
+ }
+ fuse_put_request(fc, req);
+
+ /* Don't allow userspace to do really stupid things... */
+ if ((inode->i_mode ^ mode) & S_IFMT) {
+ iput(inode);
+ return -EIO;
+ }
+
+ entry->d_time = time_to_jiffies(outarg.entry_valid,
+ outarg.entry_valid_nsec);
+
+ fi = get_fuse_inode(inode);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+
+ d_instantiate(entry, inode);
+ fuse_invalidate_attr(dir);
+ return 0;
+}
+
+static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
+ dev_t rdev)
+{
+ struct fuse_mknod_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ inarg.rdev = new_encode_dev(rdev);
+ req->in.h.opcode = FUSE_MKNOD;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, mode);
+}
+
+static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
+ struct nameidata *nd)
+{
+ return fuse_mknod(dir, entry, mode, 0);
+}
+
+static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
+{
+ struct fuse_mkdir_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.mode = mode;
+ req->in.h.opcode = FUSE_MKDIR;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = entry->d_name.len + 1;
+ req->in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, req, dir, entry, S_IFDIR);
+}
+
+static int fuse_symlink(struct inode *dir, struct dentry *entry,
+ const char *link)
+{
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ unsigned len = strlen(link) + 1;
+ struct fuse_req *req;
+
+ if (len > FUSE_SYMLINK_MAX)
+ return -ENAMETOOLONG;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_SYMLINK;
+ req->in.numargs = 2;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ req->in.args[1].size = len;
+ req->in.args[1].value = link;
+ return create_new_entry(fc, req, dir, entry, S_IFLNK);
+}
+
+static int fuse_unlink(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_UNLINK;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ struct inode *inode = entry->d_inode;
+
+ /* Set nlink to zero so the inode can be cleared, if
+ the inode does have more links this will be
+ discovered at the next lookup/getattr */
+ inode->i_nlink = 0;
+ fuse_invalidate_attr(inode);
+ fuse_invalidate_attr(dir);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rmdir(struct inode *dir, struct dentry *entry)
+{
+ int err;
+ struct fuse_conn *fc = get_fuse_conn(dir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_RMDIR;
+ req->in.h.nodeid = get_node_id(dir);
+ req->inode = dir;
+ req->in.numargs = 1;
+ req->in.args[0].size = entry->d_name.len + 1;
+ req->in.args[0].value = entry->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ entry->d_inode->i_nlink = 0;
+ fuse_invalidate_attr(dir);
+ } else if (err == -EINTR)
+ fuse_invalidate_entry(entry);
+ return err;
+}
+
+static int fuse_rename(struct inode *olddir, struct dentry *oldent,
+ struct inode *newdir, struct dentry *newent)
+{
+ int err;
+ struct fuse_rename_in inarg;
+ struct fuse_conn *fc = get_fuse_conn(olddir);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.newdir = get_node_id(newdir);
+ req->in.h.opcode = FUSE_RENAME;
+ req->in.h.nodeid = get_node_id(olddir);
+ req->inode = olddir;
+ req->inode2 = newdir;
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = oldent->d_name.len + 1;
+ req->in.args[1].value = oldent->d_name.name;
+ req->in.args[2].size = newent->d_name.len + 1;
+ req->in.args[2].value = newent->d_name.name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ fuse_invalidate_attr(olddir);
+ if (olddir != newdir)
+ fuse_invalidate_attr(newdir);
+ } else if (err == -EINTR) {
+ /* If request was interrupted, DEITY only knows if the
+ rename actually took place. If the invalidation
+ fails (e.g. some process has CWD under the renamed
+ directory), then there can be inconsistency between
+ the dcache and the real filesystem. Tough luck. */
+ fuse_invalidate_entry(oldent);
+ if (newent->d_inode)
+ fuse_invalidate_entry(newent);
+ }
+
+ return err;
+}
+
+static int fuse_link(struct dentry *entry, struct inode *newdir,
+ struct dentry *newent)
+{
+ int err;
+ struct fuse_link_in inarg;
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.oldnodeid = get_node_id(inode);
+ req->in.h.opcode = FUSE_LINK;
+ req->inode2 = inode;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = newent->d_name.len + 1;
+ req->in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ /* Contrary to "normal" filesystems it can happen that link
+ makes two "logical" inodes point to the same "physical"
+ inode. We invalidate the attributes of the old one, so it
+ will reflect changes in the backing inode (link count,
+ etc.)
+ */
+ if (!err || err == -EINTR)
+ fuse_invalidate_attr(inode);
+ return err;
+}
+
+int fuse_do_getattr(struct inode *inode)
+{
+ int err;
+ struct fuse_attr_out arg;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_GETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(arg);
+ req->out.args[0].value = &arg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if ((inode->i_mode ^ arg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse_change_attributes(inode, &arg.attr);
+ fi->i_time = time_to_jiffies(arg.attr_valid,
+ arg.attr_valid_nsec);
+ }
+ }
+ return err;
+}
+
+/*
+ * Calling into a user-controlled filesystem gives the filesystem
+ * daemon ptrace-like capabilities over the requester process. This
+ * means, that the filesystem daemon is able to record the exact
+ * filesystem operations performed, and can also control the behavior
+ * of the requester process in otherwise impossible ways. For example
+ * it can delay the operation for arbitrary length of time allowing
+ * DoS against the requester.
+ *
+ * For this reason only those processes can call into the filesystem,
+ * for which the owner of the mount has ptrace privilege. This
+ * excludes processes started by other users, suid or sgid processes.
+ */
+static int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task)
+{
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ return 1;
+
+ if (task->euid == fc->user_id &&
+ task->suid == fc->user_id &&
+ task->uid == fc->user_id &&
+ task->egid == fc->group_id &&
+ task->sgid == fc->group_id &&
+ task->gid == fc->group_id)
+ return 1;
+
+ return 0;
+}
+
+static int fuse_revalidate(struct dentry *entry)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+ if (get_node_id(inode) != FUSE_ROOT_ID &&
+ time_before_eq(jiffies, fi->i_time))
+ return 0;
+
+ return fuse_do_getattr(inode);
+}
+
+static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+ else if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ int err = generic_permission(inode, mask, NULL);
+
+ /* If permission is denied, try to refresh file
+ attributes. This is also needed, because the root
+ node will at first have no permissions */
+ if (err == -EACCES) {
+ err = fuse_do_getattr(inode);
+ if (!err)
+ err = generic_permission(inode, mask, NULL);
+ }
+
+ /* FIXME: Need some mechanism to revoke permissions:
+ currently if the filesystem suddenly changes the
+ file mode, we will not be informed about it, and
+ continue to allow access to the file/directory.
+
+ This is actually not so grave, since the user can
+ simply keep access to the file/directory anyway by
+ keeping it open... */
+
+ return err;
+ } else {
+ int mode = inode->i_mode;
+ if ((mask & MAY_WRITE) && IS_RDONLY(inode) &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+ return -EROFS;
+ if ((mask & MAY_EXEC) && !S_ISDIR(mode) && !(mode & S_IXUGO))
+ return -EACCES;
+ return 0;
+ }
+}
+
+static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
+ void *dstbuf, filldir_t filldir)
+{
+ while (nbytes >= FUSE_NAME_OFFSET) {
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+ size_t reclen = FUSE_DIRENT_SIZE(dirent);
+ int over;
+ if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
+ return -EIO;
+ if (reclen > nbytes)
+ break;
+
+ over = filldir(dstbuf, dirent->name, dirent->namelen,
+ file->f_pos, dirent->ino, dirent->type);
+ if (over)
+ break;
+
+ buf += reclen;
+ nbytes -= reclen;
+ file->f_pos = dirent->off;
+ }
+
+ return 0;
+}
+
+static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ return fuse_send_read_common(req, file, inode, pos, count, 1);
+}
+
+static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
+{
+ int err;
+ size_t nbytes;
+ struct page *page;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ fuse_put_request(fc, req);
+ return -ENOMEM;
+ }
+ req->num_pages = 1;
+ req->pages[0] = page;
+ nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ err = parse_dirfile(page_address(page), nbytes, file, dstbuf,
+ filldir);
+
+ __free_page(page);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return err;
+}
+
+static char *read_link(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req = fuse_get_request(fc);
+ char *link;
+
+ if (!req)
+ return ERR_PTR(-EINTR);
+
+ link = (char *) __get_free_page(GFP_KERNEL);
+ if (!link) {
+ link = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ req->in.h.opcode = FUSE_READLINK;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = PAGE_SIZE - 1;
+ req->out.args[0].value = link;
+ request_send(fc, req);
+ if (req->out.h.error) {
+ free_page((unsigned long) link);
+ link = ERR_PTR(req->out.h.error);
+ } else
+ link[req->out.args[0].size] = '\0';
+ out:
+ fuse_put_request(fc, req);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return link;
+}
+
+static void free_link(char *link)
+{
+ if (!IS_ERR(link))
+ free_page((unsigned long) link);
+}
+
+static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ nd_set_link(nd, read_link(dentry));
+ return NULL;
+}
+
+static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+ free_link(nd_get_link(nd));
+}
+
+static int fuse_dir_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, 1);
+}
+
+static int fuse_dir_release(struct inode *inode, struct file *file)
+{
+ return fuse_release_common(inode, file, 1);
+}
+
+static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+{
+ /* nfsd can call this with no file */
+ return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+}
+
+static unsigned iattr_to_fattr(struct iattr *iattr, struct fuse_attr *fattr)
+{
+ unsigned ivalid = iattr->ia_valid;
+ unsigned fvalid = 0;
+
+ memset(fattr, 0, sizeof(*fattr));
+
+ if (ivalid & ATTR_MODE)
+ fvalid |= FATTR_MODE, fattr->mode = iattr->ia_mode;
+ if (ivalid & ATTR_UID)
+ fvalid |= FATTR_UID, fattr->uid = iattr->ia_uid;
+ if (ivalid & ATTR_GID)
+ fvalid |= FATTR_GID, fattr->gid = iattr->ia_gid;
+ if (ivalid & ATTR_SIZE)
+ fvalid |= FATTR_SIZE, fattr->size = iattr->ia_size;
+ /* You can only _set_ these together (they may change by themselves) */
+ if ((ivalid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) {
+ fvalid |= FATTR_ATIME | FATTR_MTIME;
+ fattr->atime = iattr->ia_atime.tv_sec;
+ fattr->mtime = iattr->ia_mtime.tv_sec;
+ }
+
+ return fvalid;
+}
+
+static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ struct fuse_setattr_in inarg;
+ struct fuse_attr_out outarg;
+ int err;
+ int is_truncate = 0;
+
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
+ err = inode_change_ok(inode, attr);
+ if (err)
+ return err;
+ }
+
+ if (attr->ia_valid & ATTR_SIZE) {
+ unsigned long limit;
+ is_truncate = 1;
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ }
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.valid = iattr_to_fattr(attr, &inarg.attr);
+ req->in.h.opcode = FUSE_SETATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err) {
+ if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ make_bad_inode(inode);
+ err = -EIO;
+ } else {
+ if (is_truncate) {
+ loff_t origsize = i_size_read(inode);
+ i_size_write(inode, outarg.attr.size);
+ if (origsize > outarg.attr.size)
+ vmtruncate(inode, outarg.attr.size);
+ }
+ fuse_change_attributes(inode, &outarg.attr);
+ fi->i_time = time_to_jiffies(outarg.attr_valid,
+ outarg.attr_valid_nsec);
+ }
+ } else if (err == -EINTR)
+ fuse_invalidate_attr(inode);
+
+ return err;
+}
+
+static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
+ struct kstat *stat)
+{
+ struct inode *inode = entry->d_inode;
+ int err = fuse_revalidate(entry);
+ if (!err)
+ generic_fillattr(inode, stat);
+
+ return err;
+}
+
+static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
+ struct nameidata *nd)
+{
+ struct inode *inode;
+ int err = fuse_lookup_iget(dir, entry, &inode);
+ if (err)
+ return ERR_PTR(err);
+ if (inode && S_ISDIR(inode->i_mode)) {
+ /* Don't allow creating an alias to a directory */
+ struct dentry *alias = d_find_alias(inode);
+ if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
+ dput(alias);
+ iput(inode);
+ return ERR_PTR(-EIO);
+ }
+ }
+ return d_splice_alias(inode, entry);
+}
+
+static int fuse_setxattr(struct dentry *entry, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_setxattr_in inarg;
+ int err;
+
+ if (size > FUSE_XATTR_SIZE_MAX)
+ return -E2BIG;
+
+ if (fc->no_setxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ inarg.flags = flags;
+ req->in.h.opcode = FUSE_SETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 3;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ req->in.args[2].size = size;
+ req->in.args[2].value = value;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_setxattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
+ void *value, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_getxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_GETXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = strlen(name) + 1;
+ req->in.args[1].value = name;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = value;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_getxattr_in inarg;
+ struct fuse_getxattr_out outarg;
+ ssize_t ret;
+
+ if (fc->no_listxattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.size = size;
+ req->in.h.opcode = FUSE_LISTXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ /* This is really two different operations rolled into one */
+ req->out.numargs = 1;
+ if (size) {
+ req->out.argvar = 1;
+ req->out.args[0].size = size;
+ req->out.args[0].value = list;
+ } else {
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ }
+ request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret)
+ ret = size ? req->out.args[0].size : outarg.size;
+ else {
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
+ }
+ }
+ fuse_put_request(fc, req);
+ return ret;
+}
+
+static int fuse_removexattr(struct dentry *entry, const char *name)
+{
+ struct inode *inode = entry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ int err;
+
+ if (fc->no_removexattr)
+ return -EOPNOTSUPP;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.h.opcode = FUSE_REMOVEXATTR;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = strlen(name) + 1;
+ req->in.args[0].value = name;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_removexattr = 1;
+ err = -EOPNOTSUPP;
+ }
+ return err;
+}
+
+static struct inode_operations fuse_dir_inode_operations = {
+ .lookup = fuse_lookup,
+ .mkdir = fuse_mkdir,
+ .symlink = fuse_symlink,
+ .unlink = fuse_unlink,
+ .rmdir = fuse_rmdir,
+ .rename = fuse_rename,
+ .link = fuse_link,
+ .setattr = fuse_setattr,
+ .create = fuse_create,
+ .mknod = fuse_mknod,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static struct file_operations fuse_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .readdir = fuse_readdir,
+ .open = fuse_dir_open,
+ .release = fuse_dir_release,
+ .fsync = fuse_dir_fsync,
+};
+
+static struct inode_operations fuse_common_inode_operations = {
+ .setattr = fuse_setattr,
+ .permission = fuse_permission,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+static struct inode_operations fuse_symlink_inode_operations = {
+ .setattr = fuse_setattr,
+ .follow_link = fuse_follow_link,
+ .put_link = fuse_put_link,
+ .readlink = generic_readlink,
+ .getattr = fuse_getattr,
+ .setxattr = fuse_setxattr,
+ .getxattr = fuse_getxattr,
+ .listxattr = fuse_listxattr,
+ .removexattr = fuse_removexattr,
+};
+
+void fuse_init_common(struct inode *inode)
+{
+ inode->i_op = &fuse_common_inode_operations;
+}
+
+void fuse_init_dir(struct inode *inode)
+{
+ inode->i_op = &fuse_dir_inode_operations;
+ inode->i_fop = &fuse_dir_operations;
+}
+
+void fuse_init_symlink(struct inode *inode)
+{
+ inode->i_op = &fuse_symlink_inode_operations;
+}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
new file mode 100644
index 000000000000..6454022b0536
--- /dev/null
+++ b/fs/fuse/file.c
@@ -0,0 +1,555 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+
+static struct file_operations fuse_direct_io_file_operations;
+
+int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_open_in inarg;
+ struct fuse_open_out outarg;
+ struct fuse_file *ff;
+ int err;
+
+ err = generic_file_open(inode, file);
+ if (err)
+ return err;
+
+ /* If opening the root node, no lookup has been performed on
+ it, so the attributes must be refreshed */
+ if (get_node_id(inode) == FUSE_ROOT_ID) {
+ int err = fuse_do_getattr(inode);
+ if (err)
+ return err;
+ }
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ err = -ENOMEM;
+ ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+ if (!ff)
+ goto out_put_request;
+
+ ff->release_req = fuse_request_alloc();
+ if (!ff->release_req) {
+ kfree(ff);
+ goto out_put_request;
+ }
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+ req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ fuse_request_free(ff->release_req);
+ kfree(ff);
+ } else {
+ if (!isdir && (outarg.open_flags & FOPEN_DIRECT_IO))
+ file->f_op = &fuse_direct_io_file_operations;
+ if (!(outarg.open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages(inode->i_mapping);
+ ff->fh = outarg.fh;
+ file->private_data = ff;
+ }
+
+ out_put_request:
+ fuse_put_request(fc, req);
+ return err;
+}
+
+int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req = ff->release_req;
+ struct fuse_release_in *inarg = &req->misc.release_in;
+
+ inarg->fh = ff->fh;
+ inarg->flags = file->f_flags & ~O_EXCL;
+ req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_release_in);
+ req->in.args[0].value = inarg;
+ request_send_background(fc, req);
+ kfree(ff);
+
+ /* Return value is ignored by VFS */
+ return 0;
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
+{
+ return fuse_open_common(inode, file, 0);
+}
+
+static int fuse_release(struct inode *inode, struct file *file)
+{
+ return fuse_release_common(inode, file, 0);
+}
+
+static int fuse_flush(struct file *file)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_flush_in inarg;
+ int err;
+
+ if (fc->no_flush)
+ return 0;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ req->in.h.opcode = FUSE_FLUSH;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_flush = 1;
+ err = 0;
+ }
+ return err;
+}
+
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+ int isdir)
+{
+ struct inode *inode = de->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_req *req;
+ struct fuse_fsync_in inarg;
+ int err;
+
+ if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
+ return 0;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.fsync_flags = datasync ? 1 : 0;
+ req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ if (isdir)
+ fc->no_fsyncdir = 1;
+ else
+ fc->no_fsync = 1;
+ err = 0;
+ }
+ return err;
+}
+
+static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+{
+ return fuse_fsync_common(file, de, datasync, 0);
+}
+
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count,
+ int isdir)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_read_in inarg;
+
+ memset(&inarg, 0, sizeof(struct fuse_read_in));
+ inarg.fh = ff->fh;
+ inarg.offset = pos;
+ inarg.size = count;
+ req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_read_in);
+ req->in.args[0].value = &inarg;
+ req->out.argpages = 1;
+ req->out.argvar = 1;
+ req->out.numargs = 1;
+ req->out.args[0].size = count;
+ request_send(fc, req);
+ return req->out.args[0].size;
+}
+
+static inline size_t fuse_send_read(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ return fuse_send_read_common(req, file, inode, pos, count, 0);
+}
+
+static int fuse_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ loff_t pos = (loff_t) page->index << PAGE_CACHE_SHIFT;
+ struct fuse_req *req = fuse_get_request(fc);
+ int err = -EINTR;
+ if (!req)
+ goto out;
+
+ req->out.page_zeroing = 1;
+ req->num_pages = 1;
+ req->pages[0] = page;
+ fuse_send_read(req, file, inode, pos, PAGE_CACHE_SIZE);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err)
+ SetPageUptodate(page);
+ fuse_invalidate_attr(inode); /* atime changed */
+ out:
+ unlock_page(page);
+ return err;
+}
+
+static int fuse_send_readpages(struct fuse_req *req, struct file *file,
+ struct inode *inode)
+{
+ loff_t pos = (loff_t) req->pages[0]->index << PAGE_CACHE_SHIFT;
+ size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+ unsigned i;
+ req->out.page_zeroing = 1;
+ fuse_send_read(req, file, inode, pos, count);
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (!req->out.h.error)
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return req->out.h.error;
+}
+
+struct fuse_readpages_data {
+ struct fuse_req *req;
+ struct file *file;
+ struct inode *inode;
+};
+
+static int fuse_readpages_fill(void *_data, struct page *page)
+{
+ struct fuse_readpages_data *data = _data;
+ struct fuse_req *req = data->req;
+ struct inode *inode = data->inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (req->num_pages &&
+ (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+ req->pages[req->num_pages - 1]->index + 1 != page->index)) {
+ int err = fuse_send_readpages(req, data->file, inode);
+ if (err) {
+ unlock_page(page);
+ return err;
+ }
+ fuse_reset_request(req);
+ }
+ req->pages[req->num_pages] = page;
+ req->num_pages ++;
+ return 0;
+}
+
+static int fuse_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_readpages_data data;
+ int err;
+ data.file = file;
+ data.inode = inode;
+ data.req = fuse_get_request(fc);
+ if (!data.req)
+ return -EINTR;
+
+ err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
+ if (!err && data.req->num_pages)
+ err = fuse_send_readpages(data.req, file, inode);
+ fuse_put_request(fc, data.req);
+ fuse_invalidate_attr(inode); /* atime changed */
+ return err;
+}
+
+static size_t fuse_send_write(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_write_in inarg;
+ struct fuse_write_out outarg;
+
+ memset(&inarg, 0, sizeof(struct fuse_write_in));
+ inarg.fh = ff->fh;
+ inarg.offset = pos;
+ inarg.size = count;
+ req->in.h.opcode = FUSE_WRITE;
+ req->in.h.nodeid = get_node_id(inode);
+ req->inode = inode;
+ req->file = file;
+ req->in.argpages = 1;
+ req->in.numargs = 2;
+ req->in.args[0].size = sizeof(struct fuse_write_in);
+ req->in.args[0].value = &inarg;
+ req->in.args[1].size = count;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(struct fuse_write_out);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ return outarg.size;
+}
+
+static int fuse_prepare_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ /* No op */
+ return 0;
+}
+
+static int fuse_commit_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ int err;
+ size_t nres;
+ unsigned count = to - offset;
+ struct inode *inode = page->mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + offset;
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->num_pages = 1;
+ req->pages[0] = page;
+ req->page_offset = offset;
+ nres = fuse_send_write(req, file, inode, pos, count);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (!err && nres != count)
+ err = -EIO;
+ if (!err) {
+ pos += count;
+ if (pos > i_size_read(inode))
+ i_size_write(inode, pos);
+
+ if (offset == 0 && to == PAGE_CACHE_SIZE) {
+ clear_page_dirty(page);
+ SetPageUptodate(page);
+ }
+ }
+ fuse_invalidate_attr(inode);
+ return err;
+}
+
+static void fuse_release_user_pages(struct fuse_req *req, int write)
+{
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+ if (write)
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+}
+
+static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
+ unsigned nbytes, int write)
+{
+ unsigned long user_addr = (unsigned long) buf;
+ unsigned offset = user_addr & ~PAGE_MASK;
+ int npages;
+
+ /* This doesn't work with nfsd */
+ if (!current->mm)
+ return -EPERM;
+
+ nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+ npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ npages = min(npages, FUSE_MAX_PAGES_PER_REQ);
+ down_read(&current->mm->mmap_sem);
+ npages = get_user_pages(current, current->mm, user_addr, npages, write,
+ 0, req->pages, NULL);
+ up_read(&current->mm->mmap_sem);
+ if (npages < 0)
+ return npages;
+
+ req->num_pages = npages;
+ req->page_offset = offset;
+ return 0;
+}
+
+static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ size_t nmax = write ? fc->max_write : fc->max_read;
+ loff_t pos = *ppos;
+ ssize_t res = 0;
+ struct fuse_req *req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ while (count) {
+ size_t tmp;
+ size_t nres;
+ size_t nbytes = min(count, nmax);
+ int err = fuse_get_user_pages(req, buf, nbytes, !write);
+ if (err) {
+ res = err;
+ break;
+ }
+ tmp = (req->num_pages << PAGE_SHIFT) - req->page_offset;
+ nbytes = min(nbytes, tmp);
+ if (write)
+ nres = fuse_send_write(req, file, inode, pos, nbytes);
+ else
+ nres = fuse_send_read(req, file, inode, pos, nbytes);
+ fuse_release_user_pages(req, !write);
+ if (req->out.h.error) {
+ if (!res)
+ res = req->out.h.error;
+ break;
+ } else if (nres > nbytes) {
+ res = -EIO;
+ break;
+ }
+ count -= nres;
+ res += nres;
+ pos += nres;
+ buf += nres;
+ if (nres != nbytes)
+ break;
+ if (count)
+ fuse_reset_request(req);
+ }
+ fuse_put_request(fc, req);
+ if (res > 0) {
+ if (write && pos > i_size_read(inode))
+ i_size_write(inode, pos);
+ *ppos = pos;
+ }
+ fuse_invalidate_attr(inode);
+
+ return res;
+}
+
+static ssize_t fuse_direct_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return fuse_direct_io(file, buf, count, ppos, 0);
+}
+
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ ssize_t res;
+ /* Don't allow parallel writes to the same file */
+ down(&inode->i_sem);
+ res = fuse_direct_io(file, buf, count, ppos, 1);
+ up(&inode->i_sem);
+ return res;
+}
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & VM_SHARED)) {
+ if ((vma->vm_flags & VM_WRITE))
+ return -ENODEV;
+ else
+ vma->vm_flags &= ~VM_MAYWRITE;
+ }
+ return generic_file_mmap(file, vma);
+}
+
+static int fuse_set_page_dirty(struct page *page)
+{
+ printk("fuse_set_page_dirty: should not happen\n");
+ dump_stack();
+ return 0;
+}
+
+static struct file_operations fuse_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_file_read,
+ .write = generic_file_write,
+ .mmap = fuse_file_mmap,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ .sendfile = generic_file_sendfile,
+};
+
+static struct file_operations fuse_direct_io_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = fuse_direct_read,
+ .write = fuse_direct_write,
+ .open = fuse_open,
+ .flush = fuse_flush,
+ .release = fuse_release,
+ .fsync = fuse_fsync,
+ /* no mmap and sendfile */
+};
+
+static struct address_space_operations fuse_file_aops = {
+ .readpage = fuse_readpage,
+ .prepare_write = fuse_prepare_write,
+ .commit_write = fuse_commit_write,
+ .readpages = fuse_readpages,
+ .set_page_dirty = fuse_set_page_dirty,
+};
+
+void fuse_init_file_inode(struct inode *inode)
+{
+ inode->i_fop = &fuse_file_operations;
+ inode->i_data.a_ops = &fuse_file_aops;
+}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
new file mode 100644
index 000000000000..24d761518d86
--- /dev/null
+++ b/fs/fuse/fuse_i.h
@@ -0,0 +1,451 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include <linux/fuse.h>
+#include <linux/fs.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/backing-dev.h>
+#include <asm/semaphore.h>
+
+/** Max number of pages that can be used in a single read request */
+#define FUSE_MAX_PAGES_PER_REQ 32
+
+/** If more requests are outstanding, then the operation will block */
+#define FUSE_MAX_OUTSTANDING 10
+
+/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
+ module will check permissions based on the file mode. Otherwise no
+ permission checking is done in the kernel */
+#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
+
+/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
+ doing the mount will be allowed to access the filesystem */
+#define FUSE_ALLOW_OTHER (1 << 1)
+
+
+/** FUSE inode */
+struct fuse_inode {
+ /** Inode data */
+ struct inode inode;
+
+ /** Unique ID, which identifies the inode between userspace
+ * and kernel */
+ u64 nodeid;
+
+ /** Number of lookups on this inode */
+ u64 nlookup;
+
+ /** The request used for sending the FORGET message */
+ struct fuse_req *forget_req;
+
+ /** Time in jiffies until the file attributes are valid */
+ unsigned long i_time;
+};
+
+/** FUSE specific file data */
+struct fuse_file {
+ /** Request reserved for flush and release */
+ struct fuse_req *release_req;
+
+ /** File handle used by userspace */
+ u64 fh;
+};
+
+/** One input argument of a request */
+struct fuse_in_arg {
+ unsigned size;
+ const void *value;
+};
+
+/** The request input */
+struct fuse_in {
+ /** The request header */
+ struct fuse_in_header h;
+
+ /** True if the data for the last argument is in req->pages */
+ unsigned argpages:1;
+
+ /** Number of arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_in_arg args[3];
+};
+
+/** One output argument of a request */
+struct fuse_arg {
+ unsigned size;
+ void *value;
+};
+
+/** The request output */
+struct fuse_out {
+ /** Header returned from userspace */
+ struct fuse_out_header h;
+
+ /** Last argument is variable length (can be shorter than
+ arg->size) */
+ unsigned argvar:1;
+
+ /** Last argument is a list of pages to copy data to */
+ unsigned argpages:1;
+
+ /** Zero partially or not copied pages */
+ unsigned page_zeroing:1;
+
+ /** Number or arguments */
+ unsigned numargs;
+
+ /** Array of arguments */
+ struct fuse_arg args[3];
+};
+
+struct fuse_req;
+struct fuse_conn;
+
+/**
+ * A request to the client
+ */
+struct fuse_req {
+ /** This can be on either unused_list, pending or processing
+ lists in fuse_conn */
+ struct list_head list;
+
+ /** Entry on the background list */
+ struct list_head bg_entry;
+
+ /** refcount */
+ atomic_t count;
+
+ /** True if the request has reply */
+ unsigned isreply:1;
+
+ /** The request is preallocated */
+ unsigned preallocated:1;
+
+ /** The request was interrupted */
+ unsigned interrupted:1;
+
+ /** Request is sent in the background */
+ unsigned background:1;
+
+ /** Data is being copied to/from the request */
+ unsigned locked:1;
+
+ /** Request has been sent to userspace */
+ unsigned sent:1;
+
+ /** The request is finished */
+ unsigned finished:1;
+
+ /** The request input */
+ struct fuse_in in;
+
+ /** The request output */
+ struct fuse_out out;
+
+ /** Used to wake up the task waiting for completion of request*/
+ wait_queue_head_t waitq;
+
+ /** Data for asynchronous requests */
+ union {
+ struct fuse_forget_in forget_in;
+ struct fuse_release_in release_in;
+ struct fuse_init_in_out init_in_out;
+ } misc;
+
+ /** page vector */
+ struct page *pages[FUSE_MAX_PAGES_PER_REQ];
+
+ /** number of pages in vector */
+ unsigned num_pages;
+
+ /** offset of data on first page */
+ unsigned page_offset;
+
+ /** Inode used in the request */
+ struct inode *inode;
+
+ /** Second inode used in the request (or NULL) */
+ struct inode *inode2;
+
+ /** File used in the request (or NULL) */
+ struct file *file;
+};
+
+/**
+ * A Fuse connection.
+ *
+ * This structure is created, when the filesystem is mounted, and is
+ * destroyed, when the client device is closed and the filesystem is
+ * unmounted.
+ */
+struct fuse_conn {
+ /** Reference count */
+ int count;
+
+ /** The user id for this mount */
+ uid_t user_id;
+
+ /** The group id for this mount */
+ gid_t group_id;
+
+ /** The fuse mount flags for this mount */
+ unsigned flags;
+
+ /** Maximum read size */
+ unsigned max_read;
+
+ /** Maximum write size */
+ unsigned max_write;
+
+ /** Readers of the connection are waiting on this */
+ wait_queue_head_t waitq;
+
+ /** The list of pending requests */
+ struct list_head pending;
+
+ /** The list of requests being processed */
+ struct list_head processing;
+
+ /** Requests put in the background (RELEASE or any other
+ interrupted request) */
+ struct list_head background;
+
+ /** Controls the maximum number of outstanding requests */
+ struct semaphore outstanding_sem;
+
+ /** This counts the number of outstanding requests if
+ outstanding_sem would go negative */
+ unsigned outstanding_debt;
+
+ /** RW semaphore for exclusion with fuse_put_super() */
+ struct rw_semaphore sbput_sem;
+
+ /** The list of unused requests */
+ struct list_head unused_list;
+
+ /** The next unique request id */
+ u64 reqctr;
+
+ /** Mount is active */
+ unsigned mounted : 1;
+
+ /** Connection established */
+ unsigned connected : 1;
+
+ /** Connection failed (version mismatch) */
+ unsigned conn_error : 1;
+
+ /** Is fsync not implemented by fs? */
+ unsigned no_fsync : 1;
+
+ /** Is fsyncdir not implemented by fs? */
+ unsigned no_fsyncdir : 1;
+
+ /** Is flush not implemented by fs? */
+ unsigned no_flush : 1;
+
+ /** Is setxattr not implemented by fs? */
+ unsigned no_setxattr : 1;
+
+ /** Is getxattr not implemented by fs? */
+ unsigned no_getxattr : 1;
+
+ /** Is listxattr not implemented by fs? */
+ unsigned no_listxattr : 1;
+
+ /** Is removexattr not implemented by fs? */
+ unsigned no_removexattr : 1;
+
+ /** Backing dev info */
+ struct backing_dev_info bdi;
+};
+
+static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb)
+{
+ return (struct fuse_conn **) &sb->s_fs_info;
+}
+
+static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
+{
+ return *get_fuse_conn_super_p(sb);
+}
+
+static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
+{
+ return get_fuse_conn_super(inode->i_sb);
+}
+
+static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
+{
+ return container_of(inode, struct fuse_inode, inode);
+}
+
+static inline u64 get_node_id(struct inode *inode)
+{
+ return get_fuse_inode(inode)->nodeid;
+}
+
+/** Device operations */
+extern struct file_operations fuse_dev_operations;
+
+/**
+ * This is the single global spinlock which protects FUSE's structures
+ *
+ * The following data is protected by this lock:
+ *
+ * - the private_data field of the device file
+ * - the s_fs_info field of the super block
+ * - unused_list, pending, processing lists in fuse_conn
+ * - background list in fuse_conn
+ * - the unique request ID counter reqctr in fuse_conn
+ * - the sb (super_block) field in fuse_conn
+ * - the file (device file) field in fuse_conn
+ */
+extern spinlock_t fuse_lock;
+
+/**
+ * Get a filled in inode
+ */
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+ int generation, struct fuse_attr *attr);
+
+/**
+ * Send FORGET command
+ */
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+ unsigned long nodeid, u64 nlookup);
+
+/**
+ * Send READ or READDIR request
+ */
+size_t fuse_send_read_common(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos, size_t count,
+ int isdir);
+
+/**
+ * Send OPEN or OPENDIR request
+ */
+int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send RELEASE or RELEASEDIR request
+ */
+int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+
+/**
+ * Send FSYNC or FSYNCDIR request
+ */
+int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
+ int isdir);
+
+/**
+ * Initialise file operations on a regular file
+ */
+void fuse_init_file_inode(struct inode *inode);
+
+/**
+ * Initialise inode operations on regular files and special files
+ */
+void fuse_init_common(struct inode *inode);
+
+/**
+ * Initialise inode and file operations on a directory
+ */
+void fuse_init_dir(struct inode *inode);
+
+/**
+ * Initialise inode operations on a symlink
+ */
+void fuse_init_symlink(struct inode *inode);
+
+/**
+ * Change attributes of an inode
+ */
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr);
+
+/**
+ * Check if the connection can be released, and if yes, then free the
+ * connection structure
+ */
+void fuse_release_conn(struct fuse_conn *fc);
+
+/**
+ * Initialize the client device
+ */
+int fuse_dev_init(void);
+
+/**
+ * Cleanup the client device
+ */
+void fuse_dev_cleanup(void);
+
+/**
+ * Allocate a request
+ */
+struct fuse_req *fuse_request_alloc(void);
+
+/**
+ * Free a request
+ */
+void fuse_request_free(struct fuse_req *req);
+
+/**
+ * Reinitialize a request, the preallocated flag is left unmodified
+ */
+void fuse_reset_request(struct fuse_req *req);
+
+/**
+ * Reserve a preallocated request
+ */
+struct fuse_req *fuse_get_request(struct fuse_conn *fc);
+
+/**
+ * Decrement reference count of a request. If count goes to zero put
+ * on unused list (preallocated) or free reqest (not preallocated).
+ */
+void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request (synchronous)
+ */
+void request_send(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request with no reply
+ */
+void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Send a request in the background
+ */
+void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+
+/**
+ * Release inodes and file assiciated with background request
+ */
+void fuse_release_background(struct fuse_req *req);
+
+/**
+ * Get the attributes of a file
+ */
+int fuse_do_getattr(struct inode *inode);
+
+/**
+ * Invalidate inode attributes
+ */
+void fuse_invalidate_attr(struct inode *inode);
+
+/**
+ * Send the INIT message
+ */
+void fuse_send_init(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
new file mode 100644
index 000000000000..e69a546844d0
--- /dev/null
+++ b/fs/fuse/inode.c
@@ -0,0 +1,591 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+#include "fuse_i.h"
+
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/statfs.h>
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Filesystem in Userspace");
+MODULE_LICENSE("GPL");
+
+spinlock_t fuse_lock;
+static kmem_cache_t *fuse_inode_cachep;
+
+#define FUSE_SUPER_MAGIC 0x65735546
+
+struct fuse_mount_data {
+ int fd;
+ unsigned rootmode;
+ unsigned user_id;
+ unsigned group_id;
+ unsigned fd_present : 1;
+ unsigned rootmode_present : 1;
+ unsigned user_id_present : 1;
+ unsigned group_id_present : 1;
+ unsigned flags;
+ unsigned max_read;
+};
+
+static struct inode *fuse_alloc_inode(struct super_block *sb)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+
+ inode = kmem_cache_alloc(fuse_inode_cachep, SLAB_KERNEL);
+ if (!inode)
+ return NULL;
+
+ fi = get_fuse_inode(inode);
+ fi->i_time = jiffies - 1;
+ fi->nodeid = 0;
+ fi->nlookup = 0;
+ fi->forget_req = fuse_request_alloc();
+ if (!fi->forget_req) {
+ kmem_cache_free(fuse_inode_cachep, inode);
+ return NULL;
+ }
+
+ return inode;
+}
+
+static void fuse_destroy_inode(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ if (fi->forget_req)
+ fuse_request_free(fi->forget_req);
+ kmem_cache_free(fuse_inode_cachep, inode);
+}
+
+static void fuse_read_inode(struct inode *inode)
+{
+ /* No op */
+}
+
+void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
+ unsigned long nodeid, u64 nlookup)
+{
+ struct fuse_forget_in *inarg = &req->misc.forget_in;
+ inarg->nlookup = nlookup;
+ req->in.h.opcode = FUSE_FORGET;
+ req->in.h.nodeid = nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct fuse_forget_in);
+ req->in.args[0].value = inarg;
+ request_send_noreply(fc, req);
+}
+
+static void fuse_clear_inode(struct inode *inode)
+{
+ if (inode->i_sb->s_flags & MS_ACTIVE) {
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
+ fi->forget_req = NULL;
+ }
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
+{
+ if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
+ invalidate_inode_pages(inode->i_mapping);
+
+ inode->i_ino = attr->ino;
+ inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
+ inode->i_nlink = attr->nlink;
+ inode->i_uid = attr->uid;
+ inode->i_gid = attr->gid;
+ i_size_write(inode, attr->size);
+ inode->i_blksize = PAGE_CACHE_SIZE;
+ inode->i_blocks = attr->blocks;
+ inode->i_atime.tv_sec = attr->atime;
+ inode->i_atime.tv_nsec = attr->atimensec;
+ inode->i_mtime.tv_sec = attr->mtime;
+ inode->i_mtime.tv_nsec = attr->mtimensec;
+ inode->i_ctime.tv_sec = attr->ctime;
+ inode->i_ctime.tv_nsec = attr->ctimensec;
+}
+
+static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
+{
+ inode->i_mode = attr->mode & S_IFMT;
+ i_size_write(inode, attr->size);
+ if (S_ISREG(inode->i_mode)) {
+ fuse_init_common(inode);
+ fuse_init_file_inode(inode);
+ } else if (S_ISDIR(inode->i_mode))
+ fuse_init_dir(inode);
+ else if (S_ISLNK(inode->i_mode))
+ fuse_init_symlink(inode);
+ else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+ S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+ fuse_init_common(inode);
+ init_special_inode(inode, inode->i_mode,
+ new_decode_dev(attr->rdev));
+ } else {
+ /* Don't let user create weird files */
+ inode->i_mode = S_IFREG;
+ fuse_init_common(inode);
+ fuse_init_file_inode(inode);
+ }
+}
+
+static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+{
+ unsigned long nodeid = *(unsigned long *) _nodeidp;
+ if (get_node_id(inode) == nodeid)
+ return 1;
+ else
+ return 0;
+}
+
+static int fuse_inode_set(struct inode *inode, void *_nodeidp)
+{
+ unsigned long nodeid = *(unsigned long *) _nodeidp;
+ get_fuse_inode(inode)->nodeid = nodeid;
+ return 0;
+}
+
+struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+ int generation, struct fuse_attr *attr)
+{
+ struct inode *inode;
+ struct fuse_inode *fi;
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ int retried = 0;
+
+ retry:
+ inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
+ if (!inode)
+ return NULL;
+
+ if ((inode->i_state & I_NEW)) {
+ inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_generation = generation;
+ inode->i_data.backing_dev_info = &fc->bdi;
+ fuse_init_inode(inode, attr);
+ unlock_new_inode(inode);
+ } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+ BUG_ON(retried);
+ /* Inode has changed type, any I/O on the old should fail */
+ make_bad_inode(inode);
+ iput(inode);
+ retried = 1;
+ goto retry;
+ }
+
+ fi = get_fuse_inode(inode);
+ fi->nlookup ++;
+ fuse_change_attributes(inode, attr);
+ return inode;
+}
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ down_write(&fc->sbput_sem);
+ while (!list_empty(&fc->background))
+ fuse_release_background(list_entry(fc->background.next,
+ struct fuse_req, bg_entry));
+
+ spin_lock(&fuse_lock);
+ fc->mounted = 0;
+ fc->user_id = 0;
+ fc->group_id = 0;
+ fc->flags = 0;
+ /* Flush all readers on this fs */
+ wake_up_all(&fc->waitq);
+ up_write(&fc->sbput_sem);
+ fuse_release_conn(fc);
+ spin_unlock(&fuse_lock);
+}
+
+static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
+{
+ stbuf->f_type = FUSE_SUPER_MAGIC;
+ stbuf->f_bsize = attr->bsize;
+ stbuf->f_blocks = attr->blocks;
+ stbuf->f_bfree = attr->bfree;
+ stbuf->f_bavail = attr->bavail;
+ stbuf->f_files = attr->files;
+ stbuf->f_ffree = attr->ffree;
+ stbuf->f_namelen = attr->namelen;
+ /* fsid is left zero */
+}
+
+static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+ struct fuse_req *req;
+ struct fuse_statfs_out outarg;
+ int err;
+
+ req = fuse_get_request(fc);
+ if (!req)
+ return -EINTR;
+
+ req->in.numargs = 0;
+ req->in.h.opcode = FUSE_STATFS;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ if (!err)
+ convert_fuse_statfs(buf, &outarg.st);
+ fuse_put_request(fc, req);
+ return err;
+}
+
+enum {
+ OPT_FD,
+ OPT_ROOTMODE,
+ OPT_USER_ID,
+ OPT_GROUP_ID,
+ OPT_DEFAULT_PERMISSIONS,
+ OPT_ALLOW_OTHER,
+ OPT_MAX_READ,
+ OPT_ERR
+};
+
+static match_table_t tokens = {
+ {OPT_FD, "fd=%u"},
+ {OPT_ROOTMODE, "rootmode=%o"},
+ {OPT_USER_ID, "user_id=%u"},
+ {OPT_GROUP_ID, "group_id=%u"},
+ {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
+ {OPT_ALLOW_OTHER, "allow_other"},
+ {OPT_MAX_READ, "max_read=%u"},
+ {OPT_ERR, NULL}
+};
+
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+{
+ char *p;
+ memset(d, 0, sizeof(struct fuse_mount_data));
+ d->max_read = ~0;
+
+ while ((p = strsep(&opt, ",")) != NULL) {
+ int token;
+ int value;
+ substring_t args[MAX_OPT_ARGS];
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case OPT_FD:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->fd = value;
+ d->fd_present = 1;
+ break;
+
+ case OPT_ROOTMODE:
+ if (match_octal(&args[0], &value))
+ return 0;
+ d->rootmode = value;
+ d->rootmode_present = 1;
+ break;
+
+ case OPT_USER_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->user_id = value;
+ d->user_id_present = 1;
+ break;
+
+ case OPT_GROUP_ID:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->group_id = value;
+ d->group_id_present = 1;
+ break;
+
+ case OPT_DEFAULT_PERMISSIONS:
+ d->flags |= FUSE_DEFAULT_PERMISSIONS;
+ break;
+
+ case OPT_ALLOW_OTHER:
+ d->flags |= FUSE_ALLOW_OTHER;
+ break;
+
+ case OPT_MAX_READ:
+ if (match_int(&args[0], &value))
+ return 0;
+ d->max_read = value;
+ break;
+
+ default:
+ return 0;
+ }
+ }
+
+ if (!d->fd_present || !d->rootmode_present ||
+ !d->user_id_present || !d->group_id_present)
+ return 0;
+
+ return 1;
+}
+
+static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);
+
+ seq_printf(m, ",user_id=%u", fc->user_id);
+ seq_printf(m, ",group_id=%u", fc->group_id);
+ if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
+ seq_puts(m, ",default_permissions");
+ if (fc->flags & FUSE_ALLOW_OTHER)
+ seq_puts(m, ",allow_other");
+ if (fc->max_read != ~0)
+ seq_printf(m, ",max_read=%u", fc->max_read);
+ return 0;
+}
+
+static void free_conn(struct fuse_conn *fc)
+{
+ while (!list_empty(&fc->unused_list)) {
+ struct fuse_req *req;
+ req = list_entry(fc->unused_list.next, struct fuse_req, list);
+ list_del(&req->list);
+ fuse_request_free(req);
+ }
+ kfree(fc);
+}
+
+/* Must be called with the fuse lock held */
+void fuse_release_conn(struct fuse_conn *fc)
+{
+ fc->count--;
+ if (!fc->count)
+ free_conn(fc);
+}
+
+static struct fuse_conn *new_conn(void)
+{
+ struct fuse_conn *fc;
+
+ fc = kmalloc(sizeof(*fc), GFP_KERNEL);
+ if (fc != NULL) {
+ int i;
+ memset(fc, 0, sizeof(*fc));
+ init_waitqueue_head(&fc->waitq);
+ INIT_LIST_HEAD(&fc->pending);
+ INIT_LIST_HEAD(&fc->processing);
+ INIT_LIST_HEAD(&fc->unused_list);
+ INIT_LIST_HEAD(&fc->background);
+ sema_init(&fc->outstanding_sem, 0);
+ init_rwsem(&fc->sbput_sem);
+ for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) {
+ struct fuse_req *req = fuse_request_alloc();
+ if (!req) {
+ free_conn(fc);
+ return NULL;
+ }
+ list_add(&req->list, &fc->unused_list);
+ }
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ fc->bdi.unplug_io_fn = default_unplug_io_fn;
+ fc->reqctr = 0;
+ }
+ return fc;
+}
+
+static struct fuse_conn *get_conn(struct file *file, struct super_block *sb)
+{
+ struct fuse_conn *fc;
+
+ if (file->f_op != &fuse_dev_operations)
+ return ERR_PTR(-EINVAL);
+ fc = new_conn();
+ if (fc == NULL)
+ return ERR_PTR(-ENOMEM);
+ spin_lock(&fuse_lock);
+ if (file->private_data) {
+ free_conn(fc);
+ fc = ERR_PTR(-EINVAL);
+ } else {
+ file->private_data = fc;
+ *get_fuse_conn_super_p(sb) = fc;
+ fc->mounted = 1;
+ fc->connected = 1;
+ fc->count = 2;
+ }
+ spin_unlock(&fuse_lock);
+ return fc;
+}
+
+static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
+{
+ struct fuse_attr attr;
+ memset(&attr, 0, sizeof(attr));
+
+ attr.mode = mode;
+ attr.ino = FUSE_ROOT_ID;
+ return fuse_iget(sb, 1, 0, &attr);
+}
+
+static struct super_operations fuse_super_operations = {
+ .alloc_inode = fuse_alloc_inode,
+ .destroy_inode = fuse_destroy_inode,
+ .read_inode = fuse_read_inode,
+ .clear_inode = fuse_clear_inode,
+ .put_super = fuse_put_super,
+ .statfs = fuse_statfs,
+ .show_options = fuse_show_options,
+};
+
+static int fuse_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct fuse_conn *fc;
+ struct inode *root;
+ struct fuse_mount_data d;
+ struct file *file;
+ int err;
+
+ if (!parse_fuse_opt((char *) data, &d))
+ return -EINVAL;
+
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_magic = FUSE_SUPER_MAGIC;
+ sb->s_op = &fuse_super_operations;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+ file = fget(d.fd);
+ if (!file)
+ return -EINVAL;
+
+ fc = get_conn(file, sb);
+ fput(file);
+ if (IS_ERR(fc))
+ return PTR_ERR(fc);
+
+ fc->flags = d.flags;
+ fc->user_id = d.user_id;
+ fc->group_id = d.group_id;
+ fc->max_read = d.max_read;
+ if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
+ fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+ fc->max_write = FUSE_MAX_IN / 2;
+
+ err = -ENOMEM;
+ root = get_root_inode(sb, d.rootmode);
+ if (root == NULL)
+ goto err;
+
+ sb->s_root = d_alloc_root(root);
+ if (!sb->s_root) {
+ iput(root);
+ goto err;
+ }
+ fuse_send_init(fc);
+ return 0;
+
+ err:
+ spin_lock(&fuse_lock);
+ fuse_release_conn(fc);
+ spin_unlock(&fuse_lock);
+ return err;
+}
+
+static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
+}
+
+static struct file_system_type fuse_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuse",
+ .get_sb = fuse_get_sb,
+ .kill_sb = kill_anon_super,
+};
+
+static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep,
+ unsigned long flags)
+{
+ struct inode * inode = foo;
+
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ inode_init_once(inode);
+}
+
+static int __init fuse_fs_init(void)
+{
+ int err;
+
+ err = register_filesystem(&fuse_fs_type);
+ if (err)
+ printk("fuse: failed to register filesystem\n");
+ else {
+ fuse_inode_cachep = kmem_cache_create("fuse_inode",
+ sizeof(struct fuse_inode),
+ 0, SLAB_HWCACHE_ALIGN,
+ fuse_inode_init_once, NULL);
+ if (!fuse_inode_cachep) {
+ unregister_filesystem(&fuse_fs_type);
+ err = -ENOMEM;
+ }
+ }
+
+ return err;
+}
+
+static void fuse_fs_cleanup(void)
+{
+ unregister_filesystem(&fuse_fs_type);
+ kmem_cache_destroy(fuse_inode_cachep);
+}
+
+static int __init fuse_init(void)
+{
+ int res;
+
+ printk("fuse init (API version %i.%i)\n",
+ FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
+
+ spin_lock_init(&fuse_lock);
+ res = fuse_fs_init();
+ if (res)
+ goto err;
+
+ res = fuse_dev_init();
+ if (res)
+ goto err_fs_cleanup;
+
+ return 0;
+
+ err_fs_cleanup:
+ fuse_fs_cleanup();
+ err:
+ return res;
+}
+
+static void __exit fuse_exit(void)
+{
+ printk(KERN_DEBUG "fuse exit\n");
+
+ fuse_fs_cleanup();
+ fuse_dev_cleanup();
+}
+
+module_init(fuse_init);
+module_exit(fuse_exit);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b2d18200a003..59c5062cd63f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -284,6 +284,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
static void hostfs_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
if(HOSTFS_I(inode)->fd != -1) {
close_file(&HOSTFS_I(inode)->fd);
HOSTFS_I(inode)->fd = -1;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 38b1741fa539..e3d17e9ea6c1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -284,6 +284,7 @@ void hpfs_write_if_changed(struct inode *inode)
void hpfs_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
lock_kernel();
hpfs_remove_fnode(inode->i_sb, inode->i_ino);
unlock_kernel();
diff --git a/fs/inode.c b/fs/inode.c
index 71df1b1e8f75..f80a79ff156b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1034,19 +1034,21 @@ void generic_delete_inode(struct inode *inode)
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
- if (inode->i_data.nrpages)
- truncate_inode_pages(&inode->i_data, 0);
-
security_inode_delete(inode);
if (op->delete_inode) {
void (*delete)(struct inode *) = op->delete_inode;
if (!is_bad_inode(inode))
DQUOT_INIT(inode);
- /* s_op->delete_inode internally recalls clear_inode() */
+ /* Filesystems implementing their own
+ * s_op->delete_inode are required to call
+ * truncate_inode_pages and clear_inode()
+ * internally */
delete(inode);
- } else
+ } else {
+ truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
+ }
spin_lock(&inode_lock);
hlist_del_init(&inode->i_hash);
spin_unlock(&inode_lock);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index c6ec66fd8766..49bbc2be3d72 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1340,8 +1340,7 @@ int journal_stop(handle_t *handle)
if (handle->h_sync) {
do {
old_handle_count = transaction->t_handle_count;
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
} while (old_handle_count != transaction->t_handle_count);
}
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
index 777b90057b89..3dcc6d2162cb 100644
--- a/fs/jffs/inode-v23.c
+++ b/fs/jffs/inode-v23.c
@@ -1744,6 +1744,7 @@ jffs_delete_inode(struct inode *inode)
D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
inode->i_ino));
+ truncate_inode_pages(&inode->i_data, 0);
lock_kernel();
inode->i_size = 0;
inode->i_blocks = 0;
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index 456d7e6e29c2..27f199e94cfc 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -1701,12 +1701,10 @@ jffs_find_file(struct jffs_control *c, __u32 ino)
{
struct jffs_file *f;
int i = ino % c->hash_len;
- struct list_head *tmp;
D3(printk("jffs_find_file(): ino: %u\n", ino));
- for (tmp = c->hash[i].next; tmp != &c->hash[i]; tmp = tmp->next) {
- f = list_entry(tmp, struct jffs_file, hash);
+ list_for_each_entry(f, &c->hash[i], hash) {
if (ino != f->ino)
continue;
D3(printk("jffs_find_file(): Found file with ino "
@@ -2102,13 +2100,12 @@ jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
int result = 0;
for (pos = 0; pos < c->hash_len; pos++) {
- struct list_head *p, *next;
- for (p = c->hash[pos].next; p != &c->hash[pos]; p = next) {
- /* We need a reference to the next file in the
- list because `func' might remove the current
- file `f'. */
- next = p->next;
- r = func(list_entry(p, struct jffs_file, hash));
+ struct jffs_file *f, *next;
+
+ /* We must do _safe, because 'func' might remove the
+ current file 'f' from the list. */
+ list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
+ r = func(f);
if (r < 0)
return r;
result += r;
@@ -2613,9 +2610,8 @@ jffs_print_hash_table(struct jffs_control *c)
printk("JFFS: Dumping the file system's hash table...\n");
for (i = 0; i < c->hash_len; i++) {
- struct list_head *p;
- for (p = c->hash[i].next; p != &c->hash[i]; p = p->next) {
- struct jffs_file *f=list_entry(p,struct jffs_file,hash);
+ struct jffs_file *f;
+ list_for_each_entry(f, &c->hash[i], hash) {
printk("*** c->hash[%u]: \"%s\" "
"(ino: %u, pino: %u)\n",
i, (f->name ? f->name : ""),
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e892dab40c26..461e4934ca7c 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -23,6 +23,7 @@
#include <linux/quotaops.h>
#include <linux/posix_acl_xattr.h>
#include "jfs_incore.h"
+#include "jfs_txnmgr.h"
#include "jfs_xattr.h"
#include "jfs_acl.h"
@@ -75,7 +76,8 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
return acl;
}
-static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
+ struct posix_acl *acl)
{
char *ea_name;
struct jfs_inode_info *ji = JFS_IP(inode);
@@ -110,7 +112,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (rc < 0)
goto out;
}
- rc = __jfs_setxattr(inode, ea_name, value, size, 0);
+ rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0);
out:
kfree(value);
@@ -143,7 +145,7 @@ int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
return generic_permission(inode, mask, jfs_check_acl);
}
-int jfs_init_acl(struct inode *inode, struct inode *dir)
+int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
struct posix_acl *clone;
@@ -159,7 +161,7 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
if (acl) {
if (S_ISDIR(inode->i_mode)) {
- rc = jfs_set_acl(inode, ACL_TYPE_DEFAULT, acl);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
if (rc)
goto cleanup;
}
@@ -173,7 +175,8 @@ int jfs_init_acl(struct inode *inode, struct inode *dir)
if (rc >= 0) {
inode->i_mode = mode;
if (rc > 0)
- rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS,
+ clone);
}
posix_acl_release(clone);
cleanup:
@@ -202,8 +205,15 @@ static int jfs_acl_chmod(struct inode *inode)
return -ENOMEM;
rc = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!rc)
- rc = jfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+ if (!rc) {
+ tid_t tid = txBegin(inode->i_sb, 0);
+ down(&JFS_IP(inode)->commit_sem);
+ rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&JFS_IP(inode)->commit_sem);
+ }
posix_acl_release(clone);
return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 767c7ecb429e..0ec62d5310db 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -128,21 +128,23 @@ void jfs_delete_inode(struct inode *inode)
{
jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
- if (is_bad_inode(inode) ||
- (JFS_IP(inode)->fileset != cpu_to_le32(FILESYSTEM_I)))
- return;
+ if (!is_bad_inode(inode) &&
+ (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
- if (test_cflag(COMMIT_Freewmap, inode))
- jfs_free_zero_link(inode);
+ truncate_inode_pages(&inode->i_data, 0);
- diFree(inode);
+ if (test_cflag(COMMIT_Freewmap, inode))
+ jfs_free_zero_link(inode);
- /*
- * Free the inode from the quota allocation.
- */
- DQUOT_INIT(inode);
- DQUOT_FREE_INODE(inode);
- DQUOT_DROP(inode);
+ diFree(inode);
+
+ /*
+ * Free the inode from the quota allocation.
+ */
+ DQUOT_INIT(inode);
+ DQUOT_FREE_INODE(inode);
+ DQUOT_DROP(inode);
+ }
clear_inode(inode);
}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index a3acd3eec059..a76293767c73 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -21,8 +21,16 @@
#ifdef CONFIG_JFS_POSIX_ACL
int jfs_permission(struct inode *, int, struct nameidata *);
-int jfs_init_acl(struct inode *, struct inode *);
+int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_setattr(struct dentry *, struct iattr *);
-#endif /* CONFIG_JFS_POSIX_ACL */
+#else
+
+static inline int jfs_init_acl(tid_t tid, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+
+#endif
#endif /* _H_JFS_ACL */
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index a1052f3f0bee..25e9990bccd1 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -52,8 +52,8 @@ struct jfs_ea_list {
#define END_EALIST(ealist) \
((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
-extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t,
- int);
+extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *,
+ size_t, int);
extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t,
int);
extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
@@ -61,4 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
extern int jfs_removexattr(struct dentry *, const char *);
+#ifdef CONFIG_JFS_SECURITY
+extern int jfs_init_security(tid_t, struct inode *, struct inode *);
+#else
+static inline int jfs_init_security(tid_t tid, struct inode *inode,
+ struct inode *dir)
+{
+ return 0;
+}
+#endif
+
#endif /* H_JFS_XATTR */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 49ccde3937f9..1abe7343f920 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -39,6 +39,24 @@ struct dentry_operations jfs_ci_dentry_operations;
static s64 commitZeroLink(tid_t, struct inode *);
/*
+ * NAME: free_ea_wmap(inode)
+ *
+ * FUNCTION: free uncommitted extended attributes from working map
+ *
+ */
+static inline void free_ea_wmap(struct inode *inode)
+{
+ dxd_t *ea = &JFS_IP(inode)->ea;
+
+ if (ea->flag & DXD_EXTENT) {
+ /* free EA pages from cache */
+ invalidate_dxd_metapages(inode, *ea);
+ dbFree(inode, addressDXD(ea), lengthDXD(ea));
+ }
+ ea->flag = 0;
+}
+
+/*
* NAME: jfs_create(dip, dentry, mode)
*
* FUNCTION: create a regular file in the parent directory <dip>
@@ -89,8 +107,19 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_acl(tid, ip, dip);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc) {
+ txAbort(tid, 0);
+ goto out3;
+ }
+
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jfs_err("jfs_create: dtSearch returned %d", rc);
+ txAbort(tid, 0);
goto out3;
}
@@ -139,6 +168,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -147,11 +177,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
-
out1:
jfs_info("jfs_create: rc:%d", rc);
@@ -216,8 +241,19 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_acl(tid, ip, dip);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc) {
+ txAbort(tid, 0);
+ goto out3;
+ }
+
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jfs_err("jfs_mkdir: dtSearch returned %d", rc);
+ txAbort(tid, 0);
goto out3;
}
@@ -267,6 +303,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -275,10 +312,6 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
out1:
@@ -885,6 +918,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
+ rc = jfs_init_security(tid, ip, dip);
+ if (rc)
+ goto out3;
+
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
tblk->ino = ip->i_ino;
@@ -1000,6 +1037,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -1008,11 +1046,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
out2:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dip);
-#endif
-
out1:
jfs_info("jfs_symlink: rc:%d", rc);
return rc;
@@ -1328,8 +1361,20 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
down(&JFS_IP(dir)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
- if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
+ rc = jfs_init_acl(tid, ip, dir);
+ if (rc)
+ goto out3;
+
+ rc = jfs_init_security(tid, ip, dir);
+ if (rc) {
+ txAbort(tid, 0);
goto out3;
+ }
+
+ if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+ txAbort(tid, 0);
+ goto out3;
+ }
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
@@ -1337,8 +1382,10 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
tblk->u.ixpxd = JFS_IP(ip)->ixpxd;
ino = ip->i_ino;
- if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
+ if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+ txAbort(tid, 0);
goto out3;
+ }
ip->i_op = &jfs_file_inode_operations;
jfs_ip->dev = new_encode_dev(rdev);
@@ -1360,6 +1407,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
up(&JFS_IP(ip)->commit_sem);
up(&JFS_IP(dir)->commit_sem);
if (rc) {
+ free_ea_wmap(ip);
ip->i_nlink = 0;
iput(ip);
} else
@@ -1368,11 +1416,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
out1:
free_UCSname(&dname);
-#ifdef CONFIG_JFS_POSIX_ACL
- if (rc == 0)
- jfs_init_acl(ip, dir);
-#endif
-
out:
jfs_info("jfs_mknod: returning %d", rc);
return rc;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 554ec739e49b..23aa5066b5a4 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -21,6 +21,7 @@
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/quotaops.h>
+#include <linux/security.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
@@ -633,12 +634,12 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf)
}
}
-static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
+static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
+ int new_size)
{
struct jfs_inode_info *ji = JFS_IP(inode);
unsigned long old_blocks, new_blocks;
int rc = 0;
- tid_t tid;
if (new_size == 0) {
ea_release(inode, ea_buf);
@@ -664,9 +665,6 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
if (rc)
return rc;
- tid = txBegin(inode->i_sb, 0);
- down(&ji->commit_sem);
-
old_blocks = new_blocks = 0;
if (ji->ea.flag & DXD_EXTENT) {
@@ -695,11 +693,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
DQUOT_FREE_BLOCK(inode, old_blocks);
inode->i_ctime = CURRENT_TIME;
- rc = txCommit(tid, 1, &inode, 0);
- txEnd(tid);
- up(&ji->commit_sem);
- return rc;
+ return 0;
}
/*
@@ -810,8 +805,8 @@ static int can_set_xattr(struct inode *inode, const char *name,
return permission(inode, MAY_WRITE, NULL);
}
-int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
- size_t value_len, int flags)
+int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
+ const void *value, size_t value_len, int flags)
{
struct jfs_ea_list *ealist;
struct jfs_ea *ea, *old_ea = NULL, *next_ea = NULL;
@@ -825,9 +820,6 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
int rc;
int length;
- if ((rc = can_set_xattr(inode, name, value, value_len)))
- return rc;
-
if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
GFP_KERNEL);
@@ -939,7 +931,7 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
ealist->size = cpu_to_le32(new_size);
- rc = ea_put(inode, &ea_buf, new_size);
+ rc = ea_put(tid, inode, &ea_buf, new_size);
goto out;
release:
@@ -955,12 +947,29 @@ int __jfs_setxattr(struct inode *inode, const char *name, const void *value,
int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t value_len, int flags)
{
+ struct inode *inode = dentry->d_inode;
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ int rc;
+ tid_t tid;
+
+ if ((rc = can_set_xattr(inode, name, value, value_len)))
+ return rc;
+
if (value == NULL) { /* empty EA, do not remove */
value = "";
value_len = 0;
}
- return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
+ tid = txBegin(inode->i_sb, 0);
+ down(&ji->commit_sem);
+ rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+ flags);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&ji->commit_sem);
+
+ return rc;
}
static int can_get_xattr(struct inode *inode, const char *name)
@@ -1122,5 +1131,56 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
int jfs_removexattr(struct dentry *dentry, const char *name)
{
- return __jfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+ struct inode *inode = dentry->d_inode;
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ int rc;
+ tid_t tid;
+
+ if ((rc = can_set_xattr(inode, name, NULL, 0)))
+ return rc;
+
+ tid = txBegin(inode->i_sb, 0);
+ down(&ji->commit_sem);
+ rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+ if (!rc)
+ rc = txCommit(tid, 1, &inode, 0);
+ txEnd(tid);
+ up(&ji->commit_sem);
+
+ return rc;
+}
+
+#ifdef CONFIG_JFS_SECURITY
+int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
+{
+ int rc;
+ size_t len;
+ void *value;
+ char *suffix;
+ char *name;
+
+ rc = security_inode_init_security(inode, dir, &suffix, &value, &len);
+ if (rc) {
+ if (rc == -EOPNOTSUPP)
+ return 0;
+ return rc;
+ }
+ name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix),
+ GFP_NOFS);
+ if (!name) {
+ rc = -ENOMEM;
+ goto kmalloc_failed;
+ }
+ strcpy(name, XATTR_SECURITY_PREFIX);
+ strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
+
+ rc = __jfs_setxattr(tid, inode, name, value, len, 0);
+
+ kfree(name);
+kmalloc_failed:
+ kfree(suffix);
+ kfree(value);
+
+ return rc;
}
+#endif
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 14b3ce87fa29..87332f30141b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -299,8 +299,7 @@ nlmclnt_alloc_call(void)
return call;
}
printk("nlmclnt_alloc_call: failed, waiting for memory\n");
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(5*HZ);
+ schedule_timeout_interruptible(5*HZ);
}
return NULL;
}
diff --git a/fs/locks.c b/fs/locks.c
index 11956b6179ff..c2c09b4798d6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from)
{
struct files_struct *files = current->files;
int i, j;
+ struct fdtable *fdt;
if (from == files)
return;
lock_kernel();
j = 0;
+ fdt = files_fdtable(files);
for (;;) {
unsigned long set;
i = j * __NFDBITS;
- if (i >= files->max_fdset || i >= files->max_fds)
+ if (i >= fdt->max_fdset || i >= fdt->max_fds)
break;
- set = files->open_fds->fds_bits[j++];
+ set = fdt->open_fds->fds_bits[j++];
while (set) {
if (set & 1) {
- struct file *file = files->fd[i];
+ struct file *file = fdt->fd[i];
if (file)
__steal_locks(file, from);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 3f18c21198d7..790cc0d0e970 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,6 +24,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data);
static void minix_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
minix_truncate(inode);
minix_free_inode(inode);
diff --git a/fs/namei.c b/fs/namei.c
index 145e852c4bd0..21d85f1ac839 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
return error;
DQUOT_INIT(dir);
error = dir->i_op->create(dir, dentry, mode, nd);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_create(dir, dentry, mode);
- }
return error;
}
@@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
DQUOT_INIT(dir);
error = dir->i_op->mknod(dir, dentry, mode, dev);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_mknod(dir, dentry, mode, dev);
- }
return error;
}
@@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
DQUOT_INIT(dir);
error = dir->i_op->mkdir(dir, dentry, mode);
- if (!error) {
+ if (!error)
fsnotify_mkdir(dir, dentry->d_name.name);
- security_inode_post_mkdir(dir,dentry, mode);
- }
return error;
}
@@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
DQUOT_INIT(dir);
error = dir->i_op->symlink(dir, dentry, oldname);
- if (!error) {
+ if (!error)
fsnotify_create(dir, dentry->d_name.name);
- security_inode_post_symlink(dir, dentry, oldname);
- }
return error;
}
@@ -2020,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
DQUOT_INIT(dir);
error = dir->i_op->link(old_dentry, dir, new_dentry);
up(&old_dentry->d_inode->i_sem);
- if (!error) {
+ if (!error)
fsnotify_create(dir, new_dentry->d_name.name);
- security_inode_post_link(old_dentry, dir, new_dentry);
- }
return error;
}
@@ -2142,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
d_rehash(new_dentry);
dput(new_dentry);
}
- if (!error) {
+ if (!error)
d_move(old_dentry,new_dentry);
- security_inode_post_rename(old_dir, old_dentry,
- new_dir, new_dentry);
- }
return error;
}
@@ -2172,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
/* The following d_move() should become unconditional */
if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
d_move(old_dentry, new_dentry);
- security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry);
}
if (target)
up(&target->i_sem);
diff --git a/fs/namespace.c b/fs/namespace.c
index 34156260c9b6..2fa9fdf7d6f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -537,7 +537,6 @@ lives_below_in_same_fs(struct dentry *d, struct dentry *dentry)
static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
{
struct vfsmount *res, *p, *q, *r, *s;
- struct list_head *h;
struct nameidata nd;
res = q = clone_mnt(mnt, dentry);
@@ -546,8 +545,7 @@ static struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry)
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
- for (h = mnt->mnt_mounts.next; h != &mnt->mnt_mounts; h = h->next) {
- r = list_entry(h, struct vfsmount, mnt_child);
+ list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
if (!lives_below_in_same_fs(r->mnt_mountpoint, dentry))
continue;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 44795d2f4b30..8c8839203cd5 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -286,6 +286,8 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
static void
ncp_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (S_ISDIR(inode->i_mode)) {
DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 541b418327c8..6922469d6fc5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -146,6 +146,8 @@ nfs_delete_inode(struct inode * inode)
{
dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+ truncate_inode_pages(&inode->i_data, 0);
+
nfs_wb_all(inode);
/*
* The following should never happen...
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2681485cf2d0..edc95514046d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -34,8 +34,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
+ schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!signalled());
rpc_clnt_sigunmask(clnt, &oldset);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0c5a308e4963..9701ca8c9428 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2418,14 +2418,11 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
*timeout = NFS4_POLL_RETRY_MAX;
rpc_clnt_sigmask(clnt, &oldset);
if (clnt->cl_intr) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(*timeout);
+ schedule_timeout_interruptible(*timeout);
if (signalled())
res = -ERESTARTSYS;
- } else {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(*timeout);
- }
+ } else
+ schedule_timeout_uninterruptible(*timeout);
rpc_clnt_sigunmask(clnt, &oldset);
*timeout <<= 1;
return res;
@@ -2578,8 +2575,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(timeout);
+ schedule_timeout_interruptible(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 9eecc9939dfe..49eafbdb15c1 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -22,6 +22,77 @@ ToDo/Notes:
- Enable the code for setting the NT4 compatibility flag when we start
making NTFS 1.2 specific modifications.
+2.1.24 - Lots of bug fixes and support more clean journal states.
+
+ - Support journals ($LogFile) which have been modified by chkdsk. This
+ means users can boot into Windows after we marked the volume dirty.
+ The Windows boot will run chkdsk and then reboot. The user can then
+ immediately boot into Linux rather than having to do a full Windows
+ boot first before rebooting into Linux and we will recognize such a
+ journal and empty it as it is clean by definition.
+ - Support journals ($LogFile) with only one restart page as well as
+ journals with two different restart pages. We sanity check both and
+ either use the only sane one or the more recent one of the two in the
+ case that both are valid.
+ - Add fs/ntfs/malloc.h::ntfs_malloc_nofs_nofail() which is analogous to
+ ntfs_malloc_nofs() but it performs allocations with __GFP_NOFAIL and
+ hence cannot fail.
+ - Use ntfs_malloc_nofs_nofail() in the two critical regions in
+ fs/ntfs/runlist.c::ntfs_runlists_merge(). This means we no longer
+ need to panic() if the allocation fails as it now cannot fail.
+ - Fix two nasty runlist merging bugs that had gone unnoticed so far.
+ Thanks to Stefano Picerno for the bug report.
+ - Remove two bogus BUG_ON()s from fs/ntfs/mft.c.
+ - Fix handling of valid but empty mapping pairs array in
+ fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress().
+ - Report unrepresentable inodes during ntfs_readdir() as KERN_WARNING
+ messages and include the inode number. Thanks to Yura Pakhuchiy for
+ pointing this out.
+ - Change ntfs_rl_truncate_nolock() to throw away the runlist if the new
+ length is zero.
+ - Add runlist.[hc]::ntfs_rl_punch_nolock() which punches a caller
+ specified hole into a runlist.
+ - Fix a bug in fs/ntfs/index.c::ntfs_index_lookup(). When the returned
+ index entry is in the index root, we forgot to set the @ir pointer in
+ the index context. Thanks to Yura Pakhuchiy for finding this bug.
+ - Remove bogus setting of PageError in ntfs_read_compressed_block().
+ - Add fs/ntfs/attrib.[hc]::ntfs_resident_attr_value_resize().
+ - Fix a bug in ntfs_map_runlist_nolock() where we forgot to protect
+ access to the allocated size in the ntfs inode with the size lock.
+ - Fix ntfs_attr_vcn_to_lcn_nolock() and ntfs_attr_find_vcn_nolock() to
+ return LCN_ENOENT when there is no runlist and the allocated size is
+ zero.
+ - Fix load_attribute_list() to handle the case of a NULL runlist.
+ - Fix handling of sparse attributes in ntfs_attr_make_non_resident().
+ - Add BUG() checks to ntfs_attr_make_non_resident() and ntfs_attr_set()
+ to ensure that these functions are never called for compressed or
+ encrypted attributes.
+ - Fix cluster (de)allocators to work when the runlist is NULL and more
+ importantly to take a locked runlist rather than them locking it
+ which leads to lock reversal.
+ - Truncate {a,c,m}time to the ntfs supported time granularity when
+ updating the times in the inode in ntfs_setattr().
+ - Fixup handling of sparse, compressed, and encrypted attributes in
+ fs/ntfs/inode.c::ntfs_read_locked_{,attr_,index_}inode(),
+ fs/ntfs/aops.c::ntfs_{read,write}page().
+ - Make ntfs_write_block() not instantiate sparse blocks if they contain
+ only zeroes.
+ - Optimize fs/ntfs/aops.c::ntfs_write_block() by extending the page
+ lock protection over the buffer submission for i/o which allows the
+ removal of the get_bh()/put_bh() pairs for each buffer.
+ - Fix fs/ntfs/aops.c::ntfs_{read,write}_block() to handle the case
+ where a concurrent truncate has truncated the runlist under our feet.
+ - Fix page_has_buffers()/page_buffers() handling in fs/ntfs/aops.c.
+ - In fs/ntfs/aops.c::ntfs_end_buffer_async_read(), use a bit spin lock
+ in the first buffer head instead of a driver global spin lock to
+ improve scalability.
+ - Minor fix to error handling and error message display in
+ fs/ntfs/aops.c::ntfs_prepare_nonresident_write().
+ - Change the mount options {u,f,d}mask to always parse the number as
+ an octal number to conform to how chmod(1) works, too. Thanks to
+ Giuseppe Bilotta and Horst von Brand for pointing out the errors of
+ my ways.
+
2.1.23 - Implement extension of resident files and make writing safe as well as
many bug fixes, cleanups, and enhancements...
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index f083f27d8b69..894b2b876d35 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
unistr.o upcase.o
-EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23\"
+EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.24\"
ifeq ($(CONFIG_NTFS_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 78adad7a988d..b6cc8cf24626 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
+#include <linux/bit_spinlock.h>
#include "aops.h"
#include "attrib.h"
@@ -55,9 +56,8 @@
*/
static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
{
- static DEFINE_SPINLOCK(page_uptodate_lock);
unsigned long flags;
- struct buffer_head *tmp;
+ struct buffer_head *first, *tmp;
struct page *page;
ntfs_inode *ni;
int page_uptodate = 1;
@@ -89,11 +89,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
}
} else {
clear_buffer_uptodate(bh);
+ SetPageError(page);
ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
(unsigned long long)bh->b_blocknr);
- SetPageError(page);
}
- spin_lock_irqsave(&page_uptodate_lock, flags);
+ first = page_buffers(page);
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -108,7 +110,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
/*
* If none of the buffers had errors then we can set the page uptodate,
* but we first have to perform the post read mst fixups, if the
@@ -141,7 +144,8 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
unlock_page(page);
return;
still_busy:
- spin_unlock_irqrestore(&page_uptodate_lock, flags);
+ bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+ local_irq_restore(flags);
return;
}
@@ -185,13 +189,15 @@ static int ntfs_read_block(struct page *page)
blocksize_bits = VFS_I(ni)->i_blkbits;
blocksize = 1 << blocksize_bits;
- if (!page_has_buffers(page))
+ if (!page_has_buffers(page)) {
create_empty_buffers(page, blocksize, 0);
- bh = head = page_buffers(page);
- if (unlikely(!bh)) {
- unlock_page(page);
- return -ENOMEM;
+ if (unlikely(!page_has_buffers(page))) {
+ unlock_page(page);
+ return -ENOMEM;
+ }
}
+ bh = head = page_buffers(page);
+ BUG_ON(!bh);
iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
@@ -204,6 +210,7 @@ static int ntfs_read_block(struct page *page)
nr = i = 0;
do {
u8 *kaddr;
+ int err;
if (unlikely(buffer_uptodate(bh)))
continue;
@@ -211,6 +218,7 @@ static int ntfs_read_block(struct page *page)
arr[nr++] = bh;
continue;
}
+ err = 0;
bh->b_bdev = vol->sb->s_bdev;
/* Is the block within the allowed limits? */
if (iblock < lblock) {
@@ -252,7 +260,6 @@ lock_retry_remap:
goto handle_hole;
/* If first try and runlist unmapped, map and retry. */
if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
- int err;
is_retry = TRUE;
/*
* Attempt to map runlist, dropping lock for
@@ -263,20 +270,30 @@ lock_retry_remap:
if (likely(!err))
goto lock_retry_remap;
rl = NULL;
- lcn = err;
} else if (!rl)
up_read(&ni->runlist.lock);
+ /*
+ * If buffer is outside the runlist, treat it as a
+ * hole. This can happen due to concurrent truncate
+ * for example.
+ */
+ if (err == -ENOENT || lcn == LCN_ENOENT) {
+ err = 0;
+ goto handle_hole;
+ }
/* Hard error, zero out region. */
+ if (!err)
+ err = -EIO;
bh->b_blocknr = -1;
SetPageError(page);
ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
"attribute type 0x%x, vcn 0x%llx, "
"offset 0x%x because its location on "
"disk could not be determined%s "
- "(error code %lli).", ni->mft_no,
+ "(error code %i).", ni->mft_no,
ni->type, (unsigned long long)vcn,
vcn_ofs, is_retry ? " even after "
- "retrying" : "", (long long)lcn);
+ "retrying" : "", err);
}
/*
* Either iblock was outside lblock limits or
@@ -289,9 +306,10 @@ handle_hole:
handle_zblock:
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + i * blocksize, 0, blocksize);
- flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
- set_buffer_uptodate(bh);
+ flush_dcache_page(page);
+ if (likely(!err))
+ set_buffer_uptodate(bh);
} while (i++, iblock++, (bh = bh->b_this_page) != head);
/* Release the lock if we took it. */
@@ -367,31 +385,38 @@ retry_readpage:
return 0;
}
ni = NTFS_I(page->mapping->host);
-
+ /*
+ * Only $DATA attributes can be encrypted and only unnamed $DATA
+ * attributes can be compressed. Index root can have the flags set but
+ * this means to create compressed/encrypted files, not that the
+ * attribute is compressed/encrypted.
+ */
+ if (ni->type != AT_INDEX_ROOT) {
+ /* If attribute is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ err = -EACCES;
+ goto err_out;
+ }
+ /* Compressed data streams are handled in compress.c. */
+ if (NInoNonResident(ni) && NInoCompressed(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ BUG_ON(ni->name_len);
+ return ntfs_read_compressed_block(page);
+ }
+ }
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
- /*
- * Only unnamed $DATA attributes can be compressed or
- * encrypted.
- */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- err = -EACCES;
- goto err_out;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni))
- return ntfs_read_compressed_block(page);
- }
- /* Normal data stream. */
+ /* Normal, non-resident data stream. */
return ntfs_read_block(page);
}
/*
* Attribute is resident, implying it is not compressed or encrypted.
* This also means the attribute is smaller than an mft record and
* hence smaller than a page, so can simply zero out any pages with
- * index above 0.
+ * index above 0. Note the attribute can actually be marked compressed
+ * but if it is resident the actual data is not compressed so we are
+ * ok to ignore the compressed flag here.
*/
if (unlikely(page->index > 0)) {
kaddr = kmap_atomic(page, KM_USER0);
@@ -511,19 +536,21 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
BUG_ON(!PageUptodate(page));
create_empty_buffers(page, blocksize,
(1 << BH_Uptodate) | (1 << BH_Dirty));
+ if (unlikely(!page_has_buffers(page))) {
+ ntfs_warning(vol->sb, "Error allocating page "
+ "buffers. Redirtying page so we try "
+ "again later.");
+ /*
+ * Put the page back on mapping->dirty_pages, but leave
+ * its buffers' dirty state as-is.
+ */
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
}
bh = head = page_buffers(page);
- if (unlikely(!bh)) {
- ntfs_warning(vol->sb, "Error allocating page buffers. "
- "Redirtying page so we try again later.");
- /*
- * Put the page back on mapping->dirty_pages, but leave its
- * buffer's dirty state as-is.
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ BUG_ON(!bh);
/* NOTE: Different naming scheme to ntfs_read_block()! */
@@ -670,6 +697,27 @@ lock_retry_remap:
}
/* It is a hole, need to instantiate it. */
if (lcn == LCN_HOLE) {
+ u8 *kaddr;
+ unsigned long *bpos, *bend;
+
+ /* Check if the buffer is zero. */
+ kaddr = kmap_atomic(page, KM_USER0);
+ bpos = (unsigned long *)(kaddr + bh_offset(bh));
+ bend = (unsigned long *)((u8*)bpos + blocksize);
+ do {
+ if (unlikely(*bpos))
+ break;
+ } while (likely(++bpos < bend));
+ kunmap_atomic(kaddr, KM_USER0);
+ if (bpos == bend) {
+ /*
+ * Buffer is zero and sparse, no need to write
+ * it.
+ */
+ bh->b_blocknr = -1;
+ clear_buffer_dirty(bh);
+ continue;
+ }
// TODO: Instantiate the hole.
// clear_buffer_new(bh);
// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
@@ -690,20 +738,37 @@ lock_retry_remap:
if (likely(!err))
goto lock_retry_remap;
rl = NULL;
- lcn = err;
} else if (!rl)
up_read(&ni->runlist.lock);
+ /*
+ * If buffer is outside the runlist, truncate has cut it out
+ * of the runlist. Just clean and clear the buffer and set it
+ * uptodate so it can get discarded by the VM.
+ */
+ if (err == -ENOENT || lcn == LCN_ENOENT) {
+ u8 *kaddr;
+
+ bh->b_blocknr = -1;
+ clear_buffer_dirty(bh);
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + bh_offset(bh), 0, blocksize);
+ kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(page);
+ set_buffer_uptodate(bh);
+ err = 0;
+ continue;
+ }
/* Failed to map the buffer, even after retrying. */
+ if (!err)
+ err = -EIO;
bh->b_blocknr = -1;
ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
"because its location on disk could not be "
- "determined%s (error code %lli).", ni->mft_no,
+ "determined%s (error code %i).", ni->mft_no,
ni->type, (unsigned long long)vcn,
vcn_ofs, is_retry ? " even after "
- "retrying" : "", (long long)lcn);
- if (!err)
- err = -EIO;
+ "retrying" : "", err);
break;
} while (block++, (bh = bh->b_this_page) != head);
@@ -714,7 +779,7 @@ lock_retry_remap:
/* For the error case, need to reset bh to the beginning. */
bh = head;
- /* Just an optimization, so ->readpage() isn't called later. */
+ /* Just an optimization, so ->readpage() is not called later. */
if (unlikely(!PageUptodate(page))) {
int uptodate = 1;
do {
@@ -730,7 +795,6 @@ lock_retry_remap:
/* Setup all mapped, dirty buffers for async write i/o. */
do {
- get_bh(bh);
if (buffer_mapped(bh) && buffer_dirty(bh)) {
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
@@ -768,14 +832,8 @@ lock_retry_remap:
BUG_ON(PageWriteback(page));
set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
- unlock_page(page);
- /*
- * Submit the prepared buffers for i/o. Note the page is unlocked,
- * and the async write i/o completion handler can end_page_writeback()
- * at any time after the *first* submit_bh(). So the buffers can then
- * disappear...
- */
+ /* Submit the prepared buffers for i/o. */
need_end_writeback = TRUE;
do {
struct buffer_head *next = bh->b_this_page;
@@ -783,9 +841,9 @@ lock_retry_remap:
submit_bh(WRITE, bh);
need_end_writeback = FALSE;
}
- put_bh(bh);
bh = next;
} while (bh != head);
+ unlock_page(page);
/* If no i/o was started, need to end_page_writeback(). */
if (unlikely(need_end_writeback))
@@ -860,7 +918,6 @@ static int ntfs_write_mst_block(struct page *page,
sync = (wbc->sync_mode == WB_SYNC_ALL);
/* Make sure we have mapped buffers. */
- BUG_ON(!page_has_buffers(page));
bh = head = page_buffers(page);
BUG_ON(!bh);
@@ -1280,38 +1337,42 @@ retry_writepage:
ntfs_debug("Write outside i_size - truncated?");
return 0;
}
+ /*
+ * Only $DATA attributes can be encrypted and only unnamed $DATA
+ * attributes can be compressed. Index root can have the flags set but
+ * this means to create compressed/encrypted files, not that the
+ * attribute is compressed/encrypted.
+ */
+ if (ni->type != AT_INDEX_ROOT) {
+ /* If file is encrypted, deny access, just like NT4. */
+ if (NInoEncrypted(ni)) {
+ unlock_page(page);
+ BUG_ON(ni->type != AT_DATA);
+ ntfs_debug("Denying write access to encrypted "
+ "file.");
+ return -EACCES;
+ }
+ /* Compressed data streams are handled in compress.c. */
+ if (NInoNonResident(ni) && NInoCompressed(ni)) {
+ BUG_ON(ni->type != AT_DATA);
+ BUG_ON(ni->name_len);
+ // TODO: Implement and replace this with
+ // return ntfs_write_compressed_block(page);
+ unlock_page(page);
+ ntfs_error(vi->i_sb, "Writing to compressed files is "
+ "not supported yet. Sorry.");
+ return -EOPNOTSUPP;
+ }
+ // TODO: Implement and remove this check.
+ if (NInoNonResident(ni) && NInoSparse(ni)) {
+ unlock_page(page);
+ ntfs_error(vi->i_sb, "Writing to sparse files is not "
+ "supported yet. Sorry.");
+ return -EOPNOTSUPP;
+ }
+ }
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
- /*
- * Only unnamed $DATA attributes can be compressed, encrypted,
- * and/or sparse.
- */
- if (ni->type == AT_DATA && !ni->name_len) {
- /* If file is encrypted, deny access, just like NT4. */
- if (NInoEncrypted(ni)) {
- unlock_page(page);
- ntfs_debug("Denying write access to encrypted "
- "file.");
- return -EACCES;
- }
- /* Compressed data streams are handled in compress.c. */
- if (NInoCompressed(ni)) {
- // TODO: Implement and replace this check with
- // return ntfs_write_compressed_block(page);
- unlock_page(page);
- ntfs_error(vi->i_sb, "Writing to compressed "
- "files is not supported yet. "
- "Sorry.");
- return -EOPNOTSUPP;
- }
- // TODO: Implement and remove this check.
- if (NInoSparse(ni)) {
- unlock_page(page);
- ntfs_error(vi->i_sb, "Writing to sparse files "
- "is not supported yet. Sorry.");
- return -EOPNOTSUPP;
- }
- }
/* We have to zero every time due to mmap-at-end-of-file. */
if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
/* The page straddles i_size. */
@@ -1324,14 +1385,16 @@ retry_writepage:
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
return ntfs_write_mst_block(page, wbc);
- /* Normal data stream. */
+ /* Normal, non-resident data stream. */
return ntfs_write_block(page, wbc);
}
/*
- * Attribute is resident, implying it is not compressed, encrypted,
- * sparse, or mst protected. This also means the attribute is smaller
- * than an mft record and hence smaller than a page, so can simply
- * return error on any pages with index above 0.
+ * Attribute is resident, implying it is not compressed, encrypted, or
+ * mst protected. This also means the attribute is smaller than an mft
+ * record and hence smaller than a page, so can simply return error on
+ * any pages with index above 0. Note the attribute can actually be
+ * marked compressed but if it is resident the actual data is not
+ * compressed so we are ok to ignore the compressed flag here.
*/
BUG_ON(page_has_buffers(page));
BUG_ON(!PageUptodate(page));
@@ -1380,30 +1443,14 @@ retry_writepage:
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
-
/*
- * Here, we don't need to zero the out of bounds area everytime because
- * the below memcpy() already takes care of the mmap-at-end-of-file
- * requirements. If the file is converted to a non-resident one, then
- * the code path use is switched to the non-resident one where the
- * zeroing happens on each ntfs_writepage() invocation.
- *
- * The above also applies nicely when i_size is decreased.
- *
- * When i_size is increased, the memory between the old and new i_size
- * _must_ be zeroed (or overwritten with new data). Otherwise we will
- * expose data to userspace/disk which should never have been exposed.
- *
- * FIXME: Ensure that i_size increases do the zeroing/overwriting and
- * if we cannot guarantee that, then enable the zeroing below. If the
- * zeroing below is enabled, we MUST move the unlock_page() from above
- * to after the kunmap_atomic(), i.e. just before the
- * end_page_writeback().
- * UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
- * increases for resident attributes so those are ok.
- * TODO: ntfs_truncate(), others?
+ * Here, we do not need to zero the out of bounds area everytime
+ * because the below memcpy() already takes care of the
+ * mmap-at-end-of-file requirements. If the file is converted to a
+ * non-resident one, then the code path use is switched to the
+ * non-resident one where the zeroing happens on each ntfs_writepage()
+ * invocation.
*/
-
attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
i_size = i_size_read(vi);
if (unlikely(attr_len > i_size)) {
@@ -1681,27 +1728,25 @@ lock_retry_remap:
if (likely(!err))
goto lock_retry_remap;
rl = NULL;
- lcn = err;
} else if (!rl)
up_read(&ni->runlist.lock);
/*
* Failed to map the buffer, even after
* retrying.
*/
+ if (!err)
+ err = -EIO;
bh->b_blocknr = -1;
ntfs_error(vol->sb, "Failed to write to inode "
"0x%lx, attribute type 0x%x, "
"vcn 0x%llx, offset 0x%x "
"because its location on disk "
"could not be determined%s "
- "(error code %lli).",
+ "(error code %i).",
ni->mft_no, ni->type,
(unsigned long long)vcn,
vcn_ofs, is_retry ? " even "
- "after retrying" : "",
- (long long)lcn);
- if (!err)
- err = -EIO;
+ "after retrying" : "", err);
goto err_out;
}
/* We now have a successful remap, i.e. lcn >= 0. */
@@ -2357,6 +2402,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
buffers_to_free = bh;
}
bh = head = page_buffers(page);
+ BUG_ON(!bh);
do {
bh_ofs = bh_offset(bh);
if (bh_ofs + bh_size <= ofs)
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index cd0f9e740b14..3f9a4ff42ee5 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -43,6 +43,9 @@
* which is not an error as such. This is -ENOENT. It means that @vcn is out
* of bounds of the runlist.
*
+ * Note the runlist can be NULL after this function returns if @vcn is zero and
+ * the attribute has zero allocated size, i.e. there simply is no runlist.
+ *
* Locking: - The runlist must be locked for writing.
* - This function modifies the runlist.
*/
@@ -54,6 +57,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
ATTR_RECORD *a;
ntfs_attr_search_ctx *ctx;
runlist_element *rl;
+ unsigned long flags;
int err = 0;
ntfs_debug("Mapping runlist part containing vcn 0x%llx.",
@@ -85,8 +89,11 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn)
* ntfs_mapping_pairs_decompress() fails.
*/
end_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn) + 1;
- if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1))
+ if (unlikely(!a->data.non_resident.lowest_vcn && end_vcn <= 1)) {
+ read_lock_irqsave(&ni->size_lock, flags);
end_vcn = ni->allocated_size >> ni->vol->cluster_size_bits;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ }
if (unlikely(vcn >= end_vcn)) {
err = -ENOENT;
goto err_out;
@@ -165,6 +172,7 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
const BOOL write_locked)
{
LCN lcn;
+ unsigned long flags;
BOOL is_retry = FALSE;
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
@@ -173,6 +181,14 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
+ if (!ni->runlist.rl) {
+ read_lock_irqsave(&ni->size_lock, flags);
+ if (!ni->allocated_size) {
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ return LCN_ENOENT;
+ }
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ }
retry_remap:
/* Convert vcn to lcn. If that fails map the runlist and retry once. */
lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn);
@@ -255,6 +271,7 @@ retry_remap:
runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
const BOOL write_locked)
{
+ unsigned long flags;
runlist_element *rl;
int err = 0;
BOOL is_retry = FALSE;
@@ -265,6 +282,14 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
+ if (!ni->runlist.rl) {
+ read_lock_irqsave(&ni->size_lock, flags);
+ if (!ni->allocated_size) {
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ return ERR_PTR(-ENOENT);
+ }
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ }
retry_remap:
rl = ni->runlist.rl;
if (likely(rl && vcn >= rl[0].vcn)) {
@@ -528,6 +553,11 @@ int load_attribute_list(ntfs_volume *vol, runlist *runlist, u8 *al_start,
block_size_bits = sb->s_blocksize_bits;
down_read(&runlist->lock);
rl = runlist->rl;
+ if (!rl) {
+ ntfs_error(sb, "Cannot read attribute list since runlist is "
+ "missing.");
+ goto err_out;
+ }
/* Read all clusters specified by the runlist one run at a time. */
while (rl->length) {
lcn = ntfs_rl_vcn_to_lcn(rl, rl->vcn);
@@ -1247,6 +1277,46 @@ int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
}
/**
+ * ntfs_resident_attr_value_resize - resize the value of a resident attribute
+ * @m: mft record containing attribute record
+ * @a: attribute record whose value to resize
+ * @new_size: new size in bytes to which to resize the attribute value of @a
+ *
+ * Resize the value of the attribute @a in the mft record @m to @new_size bytes.
+ * If the value is made bigger, the newly allocated space is cleared.
+ *
+ * Return 0 on success and -errno on error. The following error codes are
+ * defined:
+ * -ENOSPC - Not enough space in the mft record @m to perform the resize.
+ *
+ * Note: On error, no modifications have been performed whatsoever.
+ *
+ * Warning: If you make a record smaller without having copied all the data you
+ * are interested in the data may be overwritten.
+ */
+int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+ const u32 new_size)
+{
+ u32 old_size;
+
+ /* Resize the resident part of the attribute record. */
+ if (ntfs_attr_record_resize(m, a,
+ le16_to_cpu(a->data.resident.value_offset) + new_size))
+ return -ENOSPC;
+ /*
+ * The resize succeeded! If we made the attribute value bigger, clear
+ * the area between the old size and @new_size.
+ */
+ old_size = le32_to_cpu(a->data.resident.value_length);
+ if (new_size > old_size)
+ memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
+ old_size, 0, new_size - old_size);
+ /* Finally update the length of the attribute value. */
+ a->data.resident.value_length = cpu_to_le32(new_size);
+ return 0;
+}
+
+/**
* ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
* @ni: ntfs inode describing the attribute to convert
*
@@ -1302,6 +1372,12 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
return err;
}
/*
+ * FIXME: Compressed and encrypted attributes are not supported when
+ * writing and we should never have gotten here for them.
+ */
+ BUG_ON(NInoCompressed(ni));
+ BUG_ON(NInoEncrypted(ni));
+ /*
* The size needs to be aligned to a cluster boundary for allocation
* purposes.
*/
@@ -1377,10 +1453,15 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
BUG_ON(a->non_resident);
/*
* Calculate new offsets for the name and the mapping pairs array.
- * We assume the attribute is not compressed or sparse.
*/
- name_ofs = (offsetof(ATTR_REC,
- data.non_resident.compressed_size) + 7) & ~7;
+ if (NInoSparse(ni) || NInoCompressed(ni))
+ name_ofs = (offsetof(ATTR_REC,
+ data.non_resident.compressed_size) +
+ sizeof(a->data.non_resident.compressed_size) +
+ 7) & ~7;
+ else
+ name_ofs = (offsetof(ATTR_REC,
+ data.non_resident.compressed_size) + 7) & ~7;
mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
/*
* Determine the size of the resident part of the now non-resident
@@ -1419,24 +1500,23 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset),
a->name_length * sizeof(ntfschar));
a->name_offset = cpu_to_le16(name_ofs);
- /*
- * FIXME: For now just clear all of these as we do not support them
- * when writing.
- */
- a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE |
- ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK));
/* Setup the fields specific to non-resident attributes. */
a->data.non_resident.lowest_vcn = 0;
a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >>
vol->cluster_size_bits);
a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs);
- a->data.non_resident.compression_unit = 0;
memset(&a->data.non_resident.reserved, 0,
sizeof(a->data.non_resident.reserved));
a->data.non_resident.allocated_size = cpu_to_sle64(new_size);
a->data.non_resident.data_size =
a->data.non_resident.initialized_size =
cpu_to_sle64(attr_size);
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ a->data.non_resident.compression_unit = 4;
+ a->data.non_resident.compressed_size =
+ a->data.non_resident.allocated_size;
+ } else
+ a->data.non_resident.compression_unit = 0;
/* Generate the mapping pairs array into the attribute record. */
err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs,
arec_size - mp_ofs, rl, 0, -1, NULL);
@@ -1446,16 +1526,19 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni)
goto undo_err_out;
}
/* Setup the in-memory attribute structure to be non-resident. */
- /*
- * FIXME: For now just clear all of these as we do not support them
- * when writing.
- */
- NInoClearSparse(ni);
- NInoClearEncrypted(ni);
- NInoClearCompressed(ni);
ni->runlist.rl = rl;
write_lock_irqsave(&ni->size_lock, flags);
ni->allocated_size = new_size;
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ ni->itype.compressed.size = ni->allocated_size;
+ ni->itype.compressed.block_size = 1U <<
+ (a->data.non_resident.compression_unit +
+ vol->cluster_size_bits);
+ ni->itype.compressed.block_size_bits =
+ ffs(ni->itype.compressed.block_size) - 1;
+ ni->itype.compressed.block_clusters = 1U <<
+ a->data.non_resident.compression_unit;
+ }
write_unlock_irqrestore(&ni->size_lock, flags);
/*
* This needs to be last since the address space operations ->readpage
@@ -1603,6 +1686,12 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
BUG_ON(cnt < 0);
if (!cnt)
goto done;
+ /*
+ * FIXME: Compressed and encrypted attributes are not supported when
+ * writing and we should never have gotten here for them.
+ */
+ BUG_ON(NInoCompressed(ni));
+ BUG_ON(NInoEncrypted(ni));
mapping = VFS_I(ni)->i_mapping;
/* Work out the starting index and page offset. */
idx = ofs >> PAGE_CACHE_SHIFT;
diff --git a/fs/ntfs/attrib.h b/fs/ntfs/attrib.h
index 0e4ac6d3c0e7..0618ed6fd7b3 100644
--- a/fs/ntfs/attrib.h
+++ b/fs/ntfs/attrib.h
@@ -99,6 +99,8 @@ extern int ntfs_attr_can_be_resident(const ntfs_volume *vol,
const ATTR_TYPE type);
extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size);
+extern int ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
+ const u32 new_size);
extern int ntfs_attr_make_non_resident(ntfs_inode *ni);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 6d265cfd49aa..25d24106f893 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -539,7 +539,6 @@ int ntfs_read_compressed_block(struct page *page)
if (unlikely(!pages || !bhs)) {
kfree(bhs);
kfree(pages);
- SetPageError(page);
unlock_page(page);
ntfs_error(vol->sb, "Failed to allocate internal buffers.");
return -ENOMEM;
@@ -871,9 +870,6 @@ lock_retry_remap:
for (; prev_cur_page < cur_page; prev_cur_page++) {
page = pages[prev_cur_page];
if (page) {
- if (prev_cur_page == xpage &&
- !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
@@ -904,8 +900,6 @@ lock_retry_remap:
"Terminating them with extreme "
"prejudice. Inode 0x%lx, page index "
"0x%lx.", ni->mft_no, page->index);
- if (cur_page == xpage && !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
@@ -953,8 +947,6 @@ err_out:
for (i = cur_page; i < max_page; i++) {
page = pages[i];
if (page) {
- if (i == xpage && !xpage_done)
- SetPageError(page);
flush_dcache_page(page);
kunmap(page);
unlock_page(page);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 46779471c542..795c3d1930f5 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1051,7 +1051,8 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
ie->key.file_name.file_name_length, &name,
NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
if (name_len <= 0) {
- ntfs_debug("Skipping unrepresentable file.");
+ ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
+ (long long)MREF_LE(ie->data.dir.indexed_file));
return 0;
}
if (ie->key.file_name.file_attributes &
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e0f530ce6b99..be9fd1dd423d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1,7 +1,7 @@
/*
- * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
+ * file.c - NTFS kernel file operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -94,6 +94,11 @@ static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
if (!datasync || !NInoNonResident(NTFS_I(vi)))
ret = ntfs_write_inode(vi, 1);
write_inode_now(vi, !datasync);
+ /*
+ * NOTE: If we were to use mapping->private_list (see ext2 and
+ * fs/buffer.c) for dirty blocks then we could optimize the below to be
+ * sync_mapping_buffers(vi->i_mapping).
+ */
err = sync_blockdev(vi->i_sb->s_bdev);
if (unlikely(err && !ret))
ret = err;
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 11fd5307d780..8f2d5727546f 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -205,6 +205,7 @@ int ntfs_index_lookup(const void *key, const int key_len,
&ie->key, key_len)) {
ir_done:
ictx->is_in_root = TRUE;
+ ictx->ir = ir;
ictx->actx = actx;
ictx->base_ni = base_ni;
ictx->ia = NULL;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 886214a77f90..dc4bbe3acf5c 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1013,41 +1013,50 @@ skip_large_dir_stuff:
}
a = ctx->attr;
/* Setup the state. */
- if (a->non_resident) {
- NInoSetNonResident(ni);
- if (a->flags & (ATTR_COMPRESSION_MASK |
- ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found "
+ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
+ NInoSetCompressed(ni);
+ if (vol->cluster_size > 4096) {
+ ntfs_error(vi->i_sb, "Found "
"compressed data but "
"compression is "
"disabled due to "
"cluster size (%i) > "
"4kiB.",
vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK)
- != ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found "
- "unknown compression "
- "method or corrupt "
- "file.");
- goto unm_err_out;
- }
+ goto unm_err_out;
+ }
+ if ((a->flags & ATTR_COMPRESSION_MASK)
+ != ATTR_IS_COMPRESSED) {
+ ntfs_error(vi->i_sb, "Found unknown "
+ "compression method "
+ "or corrupt file.");
+ goto unm_err_out;
}
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
+ }
+ if (a->flags & ATTR_IS_SPARSE)
+ NInoSetSparse(ni);
+ }
+ if (a->flags & ATTR_IS_ENCRYPTED) {
+ if (NInoCompressed(ni)) {
+ ntfs_error(vi->i_sb, "Found encrypted and "
+ "compressed data.");
+ goto unm_err_out;
+ }
+ NInoSetEncrypted(ni);
+ }
+ if (a->non_resident) {
+ NInoSetNonResident(ni);
+ if (NInoCompressed(ni) || NInoSparse(ni)) {
if (a->data.non_resident.compression_unit !=
4) {
ntfs_error(vi->i_sb, "Found "
- "nonstandard compression unit "
- "(%u instead of 4). Cannot "
- "handle this.",
- a->data.non_resident.
- compression_unit);
+ "nonstandard "
+ "compression unit (%u "
+ "instead of 4). "
+ "Cannot handle this.",
+ a->data.non_resident.
+ compression_unit);
err = -EOPNOTSUPP;
goto unm_err_out;
}
@@ -1065,14 +1074,6 @@ skip_large_dir_stuff:
a->data.non_resident.
compressed_size);
}
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Found encrypted "
- "and compressed data.");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
if (a->data.non_resident.lowest_vcn) {
ntfs_error(vi->i_sb, "First extent of $DATA "
"attribute has non zero "
@@ -1212,6 +1213,75 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
if (unlikely(err))
goto unm_err_out;
a = ctx->attr;
+ if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
+ if (a->flags & ATTR_COMPRESSION_MASK) {
+ NInoSetCompressed(ni);
+ if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
+ ni->name_len)) {
+ ntfs_error(vi->i_sb, "Found compressed "
+ "non-data or named data "
+ "attribute. Please report "
+ "you saw this message to "
+ "linux-ntfs-dev@lists."
+ "sourceforge.net");
+ goto unm_err_out;
+ }
+ if (vol->cluster_size > 4096) {
+ ntfs_error(vi->i_sb, "Found compressed "
+ "attribute but compression is "
+ "disabled due to cluster size "
+ "(%i) > 4kiB.",
+ vol->cluster_size);
+ goto unm_err_out;
+ }
+ if ((a->flags & ATTR_COMPRESSION_MASK) !=
+ ATTR_IS_COMPRESSED) {
+ ntfs_error(vi->i_sb, "Found unknown "
+ "compression method.");
+ goto unm_err_out;
+ }
+ }
+ /*
+ * The encryption flag set in an index root just means to
+ * compress all files.
+ */
+ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+ ntfs_error(vi->i_sb, "Found mst protected attribute "
+ "but the attribute is %s. Please "
+ "report you saw this message to "
+ "linux-ntfs-dev@lists.sourceforge.net",
+ NInoCompressed(ni) ? "compressed" :
+ "sparse");
+ goto unm_err_out;
+ }
+ if (a->flags & ATTR_IS_SPARSE)
+ NInoSetSparse(ni);
+ }
+ if (a->flags & ATTR_IS_ENCRYPTED) {
+ if (NInoCompressed(ni)) {
+ ntfs_error(vi->i_sb, "Found encrypted and compressed "
+ "data.");
+ goto unm_err_out;
+ }
+ /*
+ * The encryption flag set in an index root just means to
+ * encrypt all files.
+ */
+ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
+ ntfs_error(vi->i_sb, "Found mst protected attribute "
+ "but the attribute is encrypted. "
+ "Please report you saw this message "
+ "to linux-ntfs-dev@lists.sourceforge."
+ "net");
+ goto unm_err_out;
+ }
+ if (ni->type != AT_DATA) {
+ ntfs_error(vi->i_sb, "Found encrypted non-data "
+ "attribute.");
+ goto unm_err_out;
+ }
+ NInoSetEncrypted(ni);
+ }
if (!a->non_resident) {
/* Ensure the attribute name is placed before the value. */
if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
@@ -1220,11 +1290,10 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
"the attribute value.");
goto unm_err_out;
}
- if (NInoMstProtected(ni) || a->flags) {
+ if (NInoMstProtected(ni)) {
ntfs_error(vi->i_sb, "Found mst protected attribute "
- "or attribute with non-zero flags but "
- "the attribute is resident. Please "
- "report you saw this message to "
+ "but the attribute is resident. "
+ "Please report you saw this message to "
"linux-ntfs-dev@lists.sourceforge.net");
goto unm_err_out;
}
@@ -1250,50 +1319,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
"the mapping pairs array.");
goto unm_err_out;
}
- if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- NInoSetCompressed(ni);
- if ((ni->type != AT_DATA) || (ni->type ==
- AT_DATA && ni->name_len)) {
- ntfs_error(vi->i_sb, "Found compressed "
- "non-data or named "
- "data attribute. "
- "Please report you "
- "saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto unm_err_out;
- }
- if (vol->cluster_size > 4096) {
- ntfs_error(vi->i_sb, "Found compressed "
- "attribute but "
- "compression is "
- "disabled due to "
- "cluster size (%i) > "
- "4kiB.",
- vol->cluster_size);
- goto unm_err_out;
- }
- if ((a->flags & ATTR_COMPRESSION_MASK) !=
- ATTR_IS_COMPRESSED) {
- ntfs_error(vi->i_sb, "Found unknown "
- "compression method.");
- goto unm_err_out;
- }
- }
- if (NInoMstProtected(ni)) {
- ntfs_error(vi->i_sb, "Found mst protected "
- "attribute but the attribute "
- "is %s. Please report you "
- "saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net",
- NInoCompressed(ni) ?
- "compressed" : "sparse");
- goto unm_err_out;
- }
- if (a->flags & ATTR_IS_SPARSE)
- NInoSetSparse(ni);
+ if ((NInoCompressed(ni) || NInoSparse(ni)) &&
+ ni->type != AT_INDEX_ROOT) {
if (a->data.non_resident.compression_unit != 4) {
ntfs_error(vi->i_sb, "Found nonstandard "
"compression unit (%u instead "
@@ -1313,23 +1340,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
ni->itype.compressed.size = sle64_to_cpu(
a->data.non_resident.compressed_size);
}
- if (a->flags & ATTR_IS_ENCRYPTED) {
- if (a->flags & ATTR_COMPRESSION_MASK) {
- ntfs_error(vi->i_sb, "Found encrypted and "
- "compressed data.");
- goto unm_err_out;
- }
- if (NInoMstProtected(ni)) {
- ntfs_error(vi->i_sb, "Found mst protected "
- "attribute but the attribute "
- "is encrypted. Please report "
- "you saw this message to "
- "linux-ntfs-dev@lists."
- "sourceforge.net");
- goto unm_err_out;
- }
- NInoSetEncrypted(ni);
- }
if (a->data.non_resident.lowest_vcn) {
ntfs_error(vi->i_sb, "First extent of attribute has "
"non-zero lowest_vcn.");
@@ -1348,12 +1358,12 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
vi->i_mapping->a_ops = &ntfs_mst_aops;
else
vi->i_mapping->a_ops = &ntfs_aops;
- if (NInoCompressed(ni) || NInoSparse(ni))
+ if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
vi->i_blocks = ni->itype.compressed.size >> 9;
else
vi->i_blocks = ni->allocated_size >> 9;
/*
- * Make sure the base inode doesn't go away and attach it to the
+ * Make sure the base inode does not go away and attach it to the
* attribute inode.
*/
igrab(base_vi);
@@ -1480,7 +1490,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
"after the attribute value.");
goto unm_err_out;
}
- /* Compressed/encrypted/sparse index root is not allowed. */
+ /*
+ * Compressed/encrypted/sparse index root is not allowed, except for
+ * directories of course but those are not dealt with here.
+ */
if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
ATTR_IS_SPARSE)) {
ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
@@ -2430,16 +2443,18 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
* We skipped the truncate but must still update
* timestamps.
*/
- ia_valid |= ATTR_MTIME|ATTR_CTIME;
+ ia_valid |= ATTR_MTIME | ATTR_CTIME;
}
}
-
if (ia_valid & ATTR_ATIME)
- vi->i_atime = attr->ia_atime;
+ vi->i_atime = timespec_trunc(attr->ia_atime,
+ vi->i_sb->s_time_gran);
if (ia_valid & ATTR_MTIME)
- vi->i_mtime = attr->ia_mtime;
+ vi->i_mtime = timespec_trunc(attr->ia_mtime,
+ vi->i_sb->s_time_gran);
if (ia_valid & ATTR_CTIME)
- vi->i_ctime = attr->ia_ctime;
+ vi->i_ctime = timespec_trunc(attr->ia_ctime,
+ vi->i_sb->s_time_gran);
mark_inode_dirty(vi);
out:
return err;
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index a4bc07616e5d..7b5934290685 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -54,6 +54,8 @@ int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
int ret = 0;
ntfs_debug("Entering.");
+ if (!rl)
+ return 0;
for (; rl->length; rl++) {
int err;
@@ -163,17 +165,9 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
BUG_ON(zone < FIRST_ZONE);
BUG_ON(zone > LAST_ZONE);
- /* Return empty runlist if @count == 0 */
- // FIXME: Do we want to just return NULL instead? (AIA)
- if (!count) {
- rl = ntfs_malloc_nofs(PAGE_SIZE);
- if (!rl)
- return ERR_PTR(-ENOMEM);
- rl[0].vcn = start_vcn;
- rl[0].lcn = LCN_RL_NOT_MAPPED;
- rl[0].length = 0;
- return rl;
- }
+ /* Return NULL if @count is zero. */
+ if (!count)
+ return NULL;
/* Take the lcnbmp lock for writing. */
down_write(&vol->lcnbmp_lock);
/*
@@ -788,7 +782,8 @@ out:
* @vi: vfs inode whose runlist describes the clusters to free
* @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
* @count: number of clusters to free or -1 for all clusters
- * @is_rollback: if TRUE this is a rollback operation
+ * @write_locked: true if the runlist is locked for writing
+ * @is_rollback: true if this is a rollback operation
*
* Free @count clusters starting at the cluster @start_vcn in the runlist
* described by the vfs inode @vi.
@@ -806,17 +801,17 @@ out:
* Return the number of deallocated clusters (not counting sparse ones) on
* success and -errno on error.
*
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- * unlocked on return.
- * - This function takes the runlist lock of @vi for reading and
- * sometimes for writing and sometimes modifies the runlist.
+ * Locking: - The runlist described by @vi must be locked on entry and is
+ * locked on return. Note if the runlist is locked for reading the
+ * lock may be dropped and reacquired. Note the runlist may be
+ * modified when needed runlist fragments need to be mapped.
* - The volume lcn bitmap must be unlocked on entry and is unlocked
* on return.
* - This function takes the volume lcn bitmap lock for writing and
* modifies the bitmap contents.
*/
s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
- const BOOL is_rollback)
+ const BOOL write_locked, const BOOL is_rollback)
{
s64 delta, to_free, total_freed, real_freed;
ntfs_inode *ni;
@@ -848,8 +843,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
total_freed = real_freed = 0;
- down_read(&ni->runlist.lock);
- rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE);
+ rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, write_locked);
if (IS_ERR(rl)) {
if (!is_rollback)
ntfs_error(vol->sb, "Failed to find first runlist "
@@ -903,7 +897,7 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
/* Attempt to map runlist. */
vcn = rl->vcn;
- rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE);
+ rl = ntfs_attr_find_vcn_nolock(ni, vcn, write_locked);
if (IS_ERR(rl)) {
err = PTR_ERR(rl);
if (!is_rollback)
@@ -950,7 +944,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
/* Update the total done clusters. */
total_freed += to_free;
}
- up_read(&ni->runlist.lock);
if (likely(!is_rollback))
up_write(&vol->lcnbmp_lock);
@@ -960,7 +953,6 @@ s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn, s64 count,
ntfs_debug("Done.");
return real_freed;
err_out:
- up_read(&ni->runlist.lock);
if (is_rollback)
return err;
/* If no real clusters were freed, no need to rollback. */
@@ -973,7 +965,8 @@ err_out:
* If rollback fails, set the volume errors flag, emit an error
* message, and return the error code.
*/
- delta = __ntfs_cluster_free(vi, start_vcn, total_freed, TRUE);
+ delta = __ntfs_cluster_free(vi, start_vcn, total_freed, write_locked,
+ TRUE);
if (delta < 0) {
ntfs_error(vol->sb, "Failed to rollback (error %i). Leaving "
"inconsistent metadata! Unmount and run "
diff --git a/fs/ntfs/lcnalloc.h b/fs/ntfs/lcnalloc.h
index 4cac1c024af6..e4d7fb98d685 100644
--- a/fs/ntfs/lcnalloc.h
+++ b/fs/ntfs/lcnalloc.h
@@ -43,13 +43,14 @@ extern runlist_element *ntfs_cluster_alloc(ntfs_volume *vol,
const NTFS_CLUSTER_ALLOCATION_ZONES zone);
extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
- s64 count, const BOOL is_rollback);
+ s64 count, const BOOL write_locked, const BOOL is_rollback);
/**
* ntfs_cluster_free - free clusters on an ntfs volume
* @vi: vfs inode whose runlist describes the clusters to free
* @start_vcn: vcn in the runlist of @vi at which to start freeing clusters
* @count: number of clusters to free or -1 for all clusters
+ * @write_locked: true if the runlist is locked for writing
*
* Free @count clusters starting at the cluster @start_vcn in the runlist
* described by the vfs inode @vi.
@@ -64,19 +65,19 @@ extern s64 __ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
* Return the number of deallocated clusters (not counting sparse ones) on
* success and -errno on error.
*
- * Locking: - The runlist described by @vi must be unlocked on entry and is
- * unlocked on return.
- * - This function takes the runlist lock of @vi for reading and
- * sometimes for writing and sometimes modifies the runlist.
+ * Locking: - The runlist described by @vi must be locked on entry and is
+ * locked on return. Note if the runlist is locked for reading the
+ * lock may be dropped and reacquired. Note the runlist may be
+ * modified when needed runlist fragments need to be mapped.
* - The volume lcn bitmap must be unlocked on entry and is unlocked
* on return.
* - This function takes the volume lcn bitmap lock for writing and
* modifies the bitmap contents.
*/
static inline s64 ntfs_cluster_free(struct inode *vi, const VCN start_vcn,
- s64 count)
+ s64 count, const BOOL write_locked)
{
- return __ntfs_cluster_free(vi, start_vcn, count, FALSE);
+ return __ntfs_cluster_free(vi, start_vcn, count, write_locked, FALSE);
}
extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
@@ -93,8 +94,10 @@ extern int ntfs_cluster_free_from_rl_nolock(ntfs_volume *vol,
*
* Return 0 on success and -errno on error.
*
- * Locking: This function takes the volume lcn bitmap lock for writing and
- * modifies the bitmap contents.
+ * Locking: - This function takes the volume lcn bitmap lock for writing and
+ * modifies the bitmap contents.
+ * - The caller must have locked the runlist @rl for reading or
+ * writing.
*/
static inline int ntfs_cluster_free_from_rl(ntfs_volume *vol,
const runlist_element *rl)
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index 8edb8e20fb08..0173e95500d9 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -121,7 +121,7 @@ static BOOL ntfs_check_restart_page_header(struct inode *vi,
*/
if (!ntfs_is_chkd_record(rp->magic) && sle64_to_cpu(rp->chkdsk_lsn)) {
ntfs_error(vi->i_sb, "$LogFile restart page is not modified "
- "chkdsk but a chkdsk LSN is specified.");
+ "by chkdsk but a chkdsk LSN is specified.");
return FALSE;
}
ntfs_debug("Done.");
@@ -312,10 +312,12 @@ err_out:
* @vi: $LogFile inode to which the restart page belongs
* @rp: restart page to check
* @pos: position in @vi at which the restart page resides
- * @wrp: copy of the multi sector transfer deprotected restart page
+ * @wrp: [OUT] copy of the multi sector transfer deprotected restart page
+ * @lsn: [OUT] set to the current logfile lsn on success
*
- * Check the restart page @rp for consistency and return TRUE if it is
- * consistent and FALSE otherwise.
+ * Check the restart page @rp for consistency and return 0 if it is consistent
+ * and -errno otherwise. The restart page may have been modified by chkdsk in
+ * which case its magic is CHKD instead of RSTR.
*
* This function only needs NTFS_BLOCK_SIZE bytes in @rp, i.e. it does not
* require the full restart page.
@@ -323,25 +325,33 @@ err_out:
* If @wrp is not NULL, on success, *@wrp will point to a buffer containing a
* copy of the complete multi sector transfer deprotected page. On failure,
* *@wrp is undefined.
+ *
+ * Simillarly, if @lsn is not NULL, on succes *@lsn will be set to the current
+ * logfile lsn according to this restart page. On failure, *@lsn is undefined.
+ *
+ * The following error codes are defined:
+ * -EINVAL - The restart page is inconsistent.
+ * -ENOMEM - Not enough memory to load the restart page.
+ * -EIO - Failed to reading from $LogFile.
*/
-static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
- RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp)
+static int ntfs_check_and_load_restart_page(struct inode *vi,
+ RESTART_PAGE_HEADER *rp, s64 pos, RESTART_PAGE_HEADER **wrp,
+ LSN *lsn)
{
RESTART_AREA *ra;
RESTART_PAGE_HEADER *trp;
- int size;
- BOOL ret;
+ int size, err;
ntfs_debug("Entering.");
/* Check the restart page header for consistency. */
if (!ntfs_check_restart_page_header(vi, rp, pos)) {
/* Error output already done inside the function. */
- return FALSE;
+ return -EINVAL;
}
/* Check the restart area for consistency. */
if (!ntfs_check_restart_area(vi, rp)) {
/* Error output already done inside the function. */
- return FALSE;
+ return -EINVAL;
}
ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
/*
@@ -352,7 +362,7 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
if (!trp) {
ntfs_error(vi->i_sb, "Failed to allocate memory for $LogFile "
"restart page buffer.");
- return FALSE;
+ return -ENOMEM;
}
/*
* Read the whole of the restart page into the buffer. If it fits
@@ -379,6 +389,9 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
if (IS_ERR(page)) {
ntfs_error(vi->i_sb, "Error mapping $LogFile "
"page (index %lu).", idx);
+ err = PTR_ERR(page);
+ if (err != -EIO && err != -ENOMEM)
+ err = -EIO;
goto err_out;
}
size = min_t(int, to_read, PAGE_CACHE_SIZE);
@@ -392,29 +405,57 @@ static BOOL ntfs_check_and_load_restart_page(struct inode *vi,
/* Perform the multi sector transfer deprotection on the buffer. */
if (post_read_mst_fixup((NTFS_RECORD*)trp,
le32_to_cpu(rp->system_page_size))) {
- ntfs_error(vi->i_sb, "Multi sector transfer error detected in "
- "$LogFile restart page.");
- goto err_out;
+ /*
+ * A multi sector tranfer error was detected. We only need to
+ * abort if the restart page contents exceed the multi sector
+ * transfer fixup of the first sector.
+ */
+ if (le16_to_cpu(rp->restart_area_offset) +
+ le16_to_cpu(ra->restart_area_length) >
+ NTFS_BLOCK_SIZE - sizeof(u16)) {
+ ntfs_error(vi->i_sb, "Multi sector transfer error "
+ "detected in $LogFile restart page.");
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ /*
+ * If the restart page is modified by chkdsk or there are no active
+ * logfile clients, the logfile is consistent. Otherwise, need to
+ * check the log client records for consistency, too.
+ */
+ err = 0;
+ if (ntfs_is_rstr_record(rp->magic) &&
+ ra->client_in_use_list != LOGFILE_NO_CLIENT) {
+ if (!ntfs_check_log_client_array(vi, trp)) {
+ err = -EINVAL;
+ goto err_out;
+ }
+ }
+ if (lsn) {
+ if (ntfs_is_rstr_record(rp->magic))
+ *lsn = sle64_to_cpu(ra->current_lsn);
+ else /* if (ntfs_is_chkd_record(rp->magic)) */
+ *lsn = sle64_to_cpu(rp->chkdsk_lsn);
}
- /* Check the log client records for consistency. */
- ret = ntfs_check_log_client_array(vi, trp);
- if (ret && wrp)
- *wrp = trp;
- else
- ntfs_free(trp);
ntfs_debug("Done.");
- return ret;
+ if (wrp)
+ *wrp = trp;
+ else {
err_out:
- ntfs_free(trp);
- return FALSE;
+ ntfs_free(trp);
+ }
+ return err;
}
/**
* ntfs_check_logfile - check the journal for consistency
* @log_vi: struct inode of loaded journal $LogFile to check
+ * @rp: [OUT] on success this is a copy of the current restart page
*
* Check the $LogFile journal for consistency and return TRUE if it is
- * consistent and FALSE if not.
+ * consistent and FALSE if not. On success, the current restart page is
+ * returned in *@rp. Caller must call ntfs_free(*@rp) when finished with it.
*
* At present we only check the two restart pages and ignore the log record
* pages.
@@ -424,19 +465,18 @@ err_out:
* if the $LogFile was created on a system with a different page size to ours
* yet and mst deprotection would fail if our page size is smaller.
*/
-BOOL ntfs_check_logfile(struct inode *log_vi)
+BOOL ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
{
- s64 size, pos, rstr1_pos, rstr2_pos;
+ s64 size, pos;
+ LSN rstr1_lsn, rstr2_lsn;
ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
struct address_space *mapping = log_vi->i_mapping;
struct page *page = NULL;
u8 *kaddr = NULL;
RESTART_PAGE_HEADER *rstr1_ph = NULL;
RESTART_PAGE_HEADER *rstr2_ph = NULL;
- int log_page_size, log_page_mask, ofs;
+ int log_page_size, log_page_mask, err;
BOOL logfile_is_empty = TRUE;
- BOOL rstr1_found = FALSE;
- BOOL rstr2_found = FALSE;
u8 log_page_bits;
ntfs_debug("Entering.");
@@ -491,7 +531,7 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Error mapping $LogFile "
"page (index %lu).", idx);
- return FALSE;
+ goto err_out;
}
}
kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
@@ -510,99 +550,95 @@ BOOL ntfs_check_logfile(struct inode *log_vi)
*/
if (ntfs_is_rcrd_recordp((le32*)kaddr))
break;
- /*
- * A modified by chkdsk restart page means we cannot handle
- * this log file.
- */
- if (ntfs_is_chkd_recordp((le32*)kaddr)) {
- ntfs_error(vol->sb, "$LogFile has been modified by "
- "chkdsk. Mount this volume in "
- "Windows.");
- goto err_out;
- }
- /* If not a restart page, continue. */
- if (!ntfs_is_rstr_recordp((le32*)kaddr)) {
- /* Skip to the minimum page size for the next one. */
+ /* If not a (modified by chkdsk) restart page, continue. */
+ if (!ntfs_is_rstr_recordp((le32*)kaddr) &&
+ !ntfs_is_chkd_recordp((le32*)kaddr)) {
if (!pos)
pos = NTFS_BLOCK_SIZE >> 1;
continue;
}
- /* We now know we have a restart page. */
- if (!pos) {
- rstr1_found = TRUE;
- rstr1_pos = pos;
- } else {
- if (rstr2_found) {
- ntfs_error(vol->sb, "Found more than two "
- "restart pages in $LogFile.");
- goto err_out;
- }
- rstr2_found = TRUE;
- rstr2_pos = pos;
- }
/*
- * Check the restart page for consistency and get a copy of the
- * complete multi sector transfer deprotected restart page.
+ * Check the (modified by chkdsk) restart page for consistency
+ * and get a copy of the complete multi sector transfer
+ * deprotected restart page.
*/
- if (!ntfs_check_and_load_restart_page(log_vi,
+ err = ntfs_check_and_load_restart_page(log_vi,
(RESTART_PAGE_HEADER*)kaddr, pos,
- !pos ? &rstr1_ph : &rstr2_ph)) {
- /* Error output already done inside the function. */
- goto err_out;
+ !rstr1_ph ? &rstr1_ph : &rstr2_ph,
+ !rstr1_ph ? &rstr1_lsn : &rstr2_lsn);
+ if (!err) {
+ /*
+ * If we have now found the first (modified by chkdsk)
+ * restart page, continue looking for the second one.
+ */
+ if (!pos) {
+ pos = NTFS_BLOCK_SIZE >> 1;
+ continue;
+ }
+ /*
+ * We have now found the second (modified by chkdsk)
+ * restart page, so we can stop looking.
+ */
+ break;
}
/*
- * We have a valid restart page. The next one must be after
- * a whole system page size as specified by the valid restart
- * page.
+ * Error output already done inside the function. Note, we do
+ * not abort if the restart page was invalid as we might still
+ * find a valid one further in the file.
*/
+ if (err != -EINVAL) {
+ ntfs_unmap_page(page);
+ goto err_out;
+ }
+ /* Continue looking. */
if (!pos)
- pos = le32_to_cpu(rstr1_ph->system_page_size) >> 1;
+ pos = NTFS_BLOCK_SIZE >> 1;
}
- if (page) {
+ if (page)
ntfs_unmap_page(page);
- page = NULL;
- }
if (logfile_is_empty) {
NVolSetLogFileEmpty(vol);
is_empty:
ntfs_debug("Done. ($LogFile is empty.)");
return TRUE;
}
- if (!rstr1_found || !rstr2_found) {
- ntfs_error(vol->sb, "Did not find two restart pages in "
- "$LogFile.");
- goto err_out;
+ if (!rstr1_ph) {
+ BUG_ON(rstr2_ph);
+ ntfs_error(vol->sb, "Did not find any restart pages in "
+ "$LogFile and it was not empty.");
+ return FALSE;
+ }
+ /* If both restart pages were found, use the more recent one. */
+ if (rstr2_ph) {
+ /*
+ * If the second restart area is more recent, switch to it.
+ * Otherwise just throw it away.
+ */
+ if (rstr2_lsn > rstr1_lsn) {
+ ntfs_free(rstr1_ph);
+ rstr1_ph = rstr2_ph;
+ /* rstr1_lsn = rstr2_lsn; */
+ } else
+ ntfs_free(rstr2_ph);
+ rstr2_ph = NULL;
}
- /*
- * The two restart areas must be identical except for the update
- * sequence number.
- */
- ofs = le16_to_cpu(rstr1_ph->usa_ofs);
- if (memcmp(rstr1_ph, rstr2_ph, ofs) || (ofs += sizeof(u16),
- memcmp((u8*)rstr1_ph + ofs, (u8*)rstr2_ph + ofs,
- le32_to_cpu(rstr1_ph->system_page_size) - ofs))) {
- ntfs_error(vol->sb, "The two restart pages in $LogFile do not "
- "match.");
- goto err_out;
- }
- ntfs_free(rstr1_ph);
- ntfs_free(rstr2_ph);
/* All consistency checks passed. */
+ if (rp)
+ *rp = rstr1_ph;
+ else
+ ntfs_free(rstr1_ph);
ntfs_debug("Done.");
return TRUE;
err_out:
- if (page)
- ntfs_unmap_page(page);
if (rstr1_ph)
ntfs_free(rstr1_ph);
- if (rstr2_ph)
- ntfs_free(rstr2_ph);
return FALSE;
}
/**
* ntfs_is_logfile_clean - check in the journal if the volume is clean
* @log_vi: struct inode of loaded journal $LogFile to check
+ * @rp: copy of the current restart page
*
* Analyze the $LogFile journal and return TRUE if it indicates the volume was
* shutdown cleanly and FALSE if not.
@@ -619,11 +655,9 @@ err_out:
* is empty this function requires that NVolLogFileEmpty() is true otherwise an
* empty volume will be reported as dirty.
*/
-BOOL ntfs_is_logfile_clean(struct inode *log_vi)
+BOOL ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp)
{
ntfs_volume *vol = NTFS_SB(log_vi->i_sb);
- struct page *page;
- RESTART_PAGE_HEADER *rp;
RESTART_AREA *ra;
ntfs_debug("Entering.");
@@ -632,24 +666,15 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
ntfs_debug("Done. ($LogFile is empty.)");
return TRUE;
}
- /*
- * Read the first restart page. It will be possibly incomplete and
- * will not be multi sector transfer deprotected but we only need the
- * first NTFS_BLOCK_SIZE bytes so it does not matter.
- */
- page = ntfs_map_page(log_vi->i_mapping, 0);
- if (IS_ERR(page)) {
- ntfs_error(vol->sb, "Error mapping $LogFile page (index 0).");
+ BUG_ON(!rp);
+ if (!ntfs_is_rstr_record(rp->magic) &&
+ !ntfs_is_chkd_record(rp->magic)) {
+ ntfs_error(vol->sb, "Restart page buffer is invalid. This is "
+ "probably a bug in that the $LogFile should "
+ "have been consistency checked before calling "
+ "this function.");
return FALSE;
}
- rp = (RESTART_PAGE_HEADER*)page_address(page);
- if (!ntfs_is_rstr_record(rp->magic)) {
- ntfs_error(vol->sb, "No restart page found at offset zero in "
- "$LogFile. This is probably a bug in that "
- "the $LogFile should have been consistency "
- "checked before calling this function.");
- goto err_out;
- }
ra = (RESTART_AREA*)((u8*)rp + le16_to_cpu(rp->restart_area_offset));
/*
* If the $LogFile has active clients, i.e. it is open, and we do not
@@ -659,15 +684,11 @@ BOOL ntfs_is_logfile_clean(struct inode *log_vi)
if (ra->client_in_use_list != LOGFILE_NO_CLIENT &&
!(ra->flags & RESTART_VOLUME_IS_CLEAN)) {
ntfs_debug("Done. $LogFile indicates a dirty shutdown.");
- goto err_out;
+ return FALSE;
}
- ntfs_unmap_page(page);
/* $LogFile indicates a clean shutdown. */
ntfs_debug("Done. $LogFile indicates a clean shutdown.");
return TRUE;
-err_out:
- ntfs_unmap_page(page);
- return FALSE;
}
/**
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 4ee4378de061..42388f95ea6d 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -2,7 +2,7 @@
* logfile.h - Defines for NTFS kernel journal ($LogFile) handling. Part of
* the Linux-NTFS project.
*
- * Copyright (c) 2000-2004 Anton Altaparmakov
+ * Copyright (c) 2000-2005 Anton Altaparmakov
*
* This program/include file is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published
@@ -296,9 +296,11 @@ typedef struct {
/* sizeof() = 160 (0xa0) bytes */
} __attribute__ ((__packed__)) LOG_CLIENT_RECORD;
-extern BOOL ntfs_check_logfile(struct inode *log_vi);
+extern BOOL ntfs_check_logfile(struct inode *log_vi,
+ RESTART_PAGE_HEADER **rp);
-extern BOOL ntfs_is_logfile_clean(struct inode *log_vi);
+extern BOOL ntfs_is_logfile_clean(struct inode *log_vi,
+ const RESTART_PAGE_HEADER *rp);
extern BOOL ntfs_empty_logfile(struct inode *log_vi);
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index fac5944df6d8..3288bcc2c4aa 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -27,27 +27,63 @@
#include <linux/highmem.h>
/**
- * ntfs_malloc_nofs - allocate memory in multiples of pages
- * @size number of bytes to allocate
+ * __ntfs_malloc - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ * @gfp_mask: extra flags for the allocator
+ *
+ * Internal function. You probably want ntfs_malloc_nofs()...
*
* Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
* returns a pointer to the allocated memory.
*
* If there was insufficient memory to complete the request, return NULL.
+ * Depending on @gfp_mask the allocation may be guaranteed to succeed.
*/
-static inline void *ntfs_malloc_nofs(unsigned long size)
+static inline void *__ntfs_malloc(unsigned long size,
+ unsigned int __nocast gfp_mask)
{
if (likely(size <= PAGE_SIZE)) {
BUG_ON(!size);
/* kmalloc() has per-CPU caches so is faster for now. */
- return kmalloc(PAGE_SIZE, GFP_NOFS);
- /* return (void *)__get_free_page(GFP_NOFS | __GFP_HIGHMEM); */
+ return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
+ /* return (void *)__get_free_page(gfp_mask); */
}
if (likely(size >> PAGE_SHIFT < num_physpages))
- return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc(size, gfp_mask, PAGE_KERNEL);
return NULL;
}
+/**
+ * ntfs_malloc_nofs - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs(unsigned long size)
+{
+ return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM);
+}
+
+/**
+ * ntfs_malloc_nofs_nofail - allocate memory in multiples of pages
+ * @size: number of bytes to allocate
+ *
+ * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and
+ * returns a pointer to the allocated memory.
+ *
+ * This function guarantees that the allocation will succeed. It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * If there was insufficient memory to complete the request, return NULL.
+ */
+static inline void *ntfs_malloc_nofs_nofail(unsigned long size)
+{
+ return __ntfs_malloc(size, GFP_NOFS | __GFP_HIGHMEM | __GFP_NOFAIL);
+}
+
static inline void ntfs_free(void *addr)
{
if (likely(((unsigned long)addr < VMALLOC_START) ||
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 317f7c679fd3..2c32b84385a8 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -511,7 +511,6 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
} while (bh);
tail->b_this_page = head;
attach_page_buffers(page, head);
- BUG_ON(!page_has_buffers(page));
}
bh = head = page_buffers(page);
BUG_ON(!bh);
@@ -692,7 +691,6 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
*/
if (!NInoTestClearDirty(ni))
goto done;
- BUG_ON(!page_has_buffers(page));
bh = head = page_buffers(page);
BUG_ON(!bh);
rl = NULL;
@@ -1955,7 +1953,7 @@ restore_undo_alloc:
a = ctx->attr;
a->data.non_resident.highest_vcn = cpu_to_sle64(old_last_vcn - 1);
undo_alloc:
- if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1) < 0) {
+ if (ntfs_cluster_free(vol->mft_ino, old_last_vcn, -1, TRUE) < 0) {
ntfs_error(vol->sb, "Failed to free clusters from mft data "
"attribute.%s", es);
NVolSetErrors(vol);
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index 758855b0414e..f5b2ac929081 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -35,7 +35,7 @@ static inline void ntfs_rl_mm(runlist_element *base, int dst, int src,
int size)
{
if (likely((dst != src) && (size > 0)))
- memmove(base + dst, base + src, size * sizeof (*base));
+ memmove(base + dst, base + src, size * sizeof(*base));
}
/**
@@ -95,6 +95,51 @@ static inline runlist_element *ntfs_rl_realloc(runlist_element *rl,
}
/**
+ * ntfs_rl_realloc_nofail - Reallocate memory for runlists
+ * @rl: original runlist
+ * @old_size: number of runlist elements in the original runlist @rl
+ * @new_size: number of runlist elements we need space for
+ *
+ * As the runlists grow, more memory will be required. To prevent the
+ * kernel having to allocate and reallocate large numbers of small bits of
+ * memory, this function returns an entire page of memory.
+ *
+ * This function guarantees that the allocation will succeed. It will sleep
+ * for as long as it takes to complete the allocation.
+ *
+ * It is up to the caller to serialize access to the runlist @rl.
+ *
+ * N.B. If the new allocation doesn't require a different number of pages in
+ * memory, the function will return the original pointer.
+ *
+ * On success, return a pointer to the newly allocated, or recycled, memory.
+ * On error, return -errno. The following error codes are defined:
+ * -ENOMEM - Not enough memory to allocate runlist array.
+ * -EINVAL - Invalid parameters were passed in.
+ */
+static inline runlist_element *ntfs_rl_realloc_nofail(runlist_element *rl,
+ int old_size, int new_size)
+{
+ runlist_element *new_rl;
+
+ old_size = PAGE_ALIGN(old_size * sizeof(*rl));
+ new_size = PAGE_ALIGN(new_size * sizeof(*rl));
+ if (old_size == new_size)
+ return rl;
+
+ new_rl = ntfs_malloc_nofs_nofail(new_size);
+ BUG_ON(!new_rl);
+
+ if (likely(rl != NULL)) {
+ if (unlikely(old_size > new_size))
+ old_size = new_size;
+ memcpy(new_rl, rl, old_size);
+ ntfs_free(rl);
+ }
+ return new_rl;
+}
+
+/**
* ntfs_are_rl_mergeable - test if two runlists can be joined together
* @dst: original runlist
* @src: new runlist to test for mergeability with @dst
@@ -497,6 +542,7 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
/* Scan to the end of the source runlist. */
for (dend = 0; likely(drl[dend].length); dend++)
;
+ dend++;
drl = ntfs_rl_realloc(drl, dend, dend + 1);
if (IS_ERR(drl))
return drl;
@@ -566,8 +612,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
((drl[dins].vcn + drl[dins].length) <= /* End of hole */
(srl[send - 1].vcn + srl[send - 1].length)));
- /* Or we'll lose an end marker */
- if (start && finish && (drl[dins].length == 0))
+ /* Or we will lose an end marker. */
+ if (finish && !drl[dins].length)
ss++;
if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn))
finish = FALSE;
@@ -621,11 +667,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
if (drl[ds].lcn != LCN_RL_NOT_MAPPED) {
/* Add an unmapped runlist element. */
if (!slots) {
- /* FIXME/TODO: We need to have the
- * extra memory already! (AIA) */
- drl = ntfs_rl_realloc(drl, ds, ds + 2);
- if (!drl)
- goto critical_error;
+ drl = ntfs_rl_realloc_nofail(drl, ds,
+ ds + 2);
slots = 2;
}
ds++;
@@ -640,13 +683,8 @@ runlist_element *ntfs_runlists_merge(runlist_element *drl,
drl[ds].length = marker_vcn - drl[ds].vcn;
/* Finally add the ENOENT terminator. */
ds++;
- if (!slots) {
- /* FIXME/TODO: We need to have the extra
- * memory already! (AIA) */
- drl = ntfs_rl_realloc(drl, ds, ds + 1);
- if (!drl)
- goto critical_error;
- }
+ if (!slots)
+ drl = ntfs_rl_realloc_nofail(drl, ds, ds + 1);
drl[ds].vcn = marker_vcn;
drl[ds].lcn = LCN_ENOENT;
drl[ds].length = (s64)0;
@@ -659,11 +697,6 @@ finished:
ntfs_debug("Merged runlist:");
ntfs_debug_dump_runlist(drl);
return drl;
-
-critical_error:
- /* Critical error! We cannot afford to fail here. */
- ntfs_error(NULL, "Critical error! Not enough memory.");
- panic("NTFS: Cannot continue.");
}
/**
@@ -727,6 +760,9 @@ runlist_element *ntfs_mapping_pairs_decompress(const ntfs_volume *vol,
ntfs_error(vol->sb, "Corrupt attribute.");
return ERR_PTR(-EIO);
}
+ /* If the mapping pairs array is valid but empty, nothing to do. */
+ if (!vcn && !*buf)
+ return old_rl;
/* Current position in runlist array. */
rlpos = 0;
/* Allocate first page and set current runlist size to one page. */
@@ -1419,6 +1455,7 @@ err_out:
/**
* ntfs_rl_truncate_nolock - truncate a runlist starting at a specified vcn
+ * @vol: ntfs volume (needed for error output)
* @runlist: runlist to truncate
* @new_length: the new length of the runlist in VCNs
*
@@ -1426,12 +1463,16 @@ err_out:
* holding the runlist elements to a length of @new_length VCNs.
*
* If @new_length lies within the runlist, the runlist elements with VCNs of
- * @new_length and above are discarded.
+ * @new_length and above are discarded. As a special case if @new_length is
+ * zero, the runlist is discarded and set to NULL.
*
* If @new_length lies beyond the runlist, a sparse runlist element is added to
* the end of the runlist @runlist or if the last runlist element is a sparse
* one already, this is extended.
*
+ * Note, no checking is done for unmapped runlist elements. It is assumed that
+ * the caller has mapped any elements that need to be mapped already.
+ *
* Return 0 on success and -errno on error.
*
* Locking: The caller must hold @runlist->lock for writing.
@@ -1446,6 +1487,13 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
BUG_ON(!runlist);
BUG_ON(new_length < 0);
rl = runlist->rl;
+ if (!new_length) {
+ ntfs_debug("Freeing runlist.");
+ runlist->rl = NULL;
+ if (rl)
+ ntfs_free(rl);
+ return 0;
+ }
if (unlikely(!rl)) {
/*
* Create a runlist consisting of a sparse runlist element of
@@ -1553,4 +1601,288 @@ int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist,
return 0;
}
+/**
+ * ntfs_rl_punch_nolock - punch a hole into a runlist
+ * @vol: ntfs volume (needed for error output)
+ * @runlist: runlist to punch a hole into
+ * @start: starting VCN of the hole to be created
+ * @length: size of the hole to be created in units of clusters
+ *
+ * Punch a hole into the runlist @runlist starting at VCN @start and of size
+ * @length clusters.
+ *
+ * Return 0 on success and -errno on error, in which case @runlist has not been
+ * modified.
+ *
+ * If @start and/or @start + @length are outside the runlist return error code
+ * -ENOENT.
+ *
+ * If the runlist contains unmapped or error elements between @start and @start
+ * + @length return error code -EINVAL.
+ *
+ * Locking: The caller must hold @runlist->lock for writing.
+ */
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+ const VCN start, const s64 length)
+{
+ const VCN end = start + length;
+ s64 delta;
+ runlist_element *rl, *rl_end, *rl_real_end, *trl;
+ int old_size;
+ BOOL lcn_fixup = FALSE;
+
+ ntfs_debug("Entering for start 0x%llx, length 0x%llx.",
+ (long long)start, (long long)length);
+ BUG_ON(!runlist);
+ BUG_ON(start < 0);
+ BUG_ON(length < 0);
+ BUG_ON(end < 0);
+ rl = runlist->rl;
+ if (unlikely(!rl)) {
+ if (likely(!start && !length))
+ return 0;
+ return -ENOENT;
+ }
+ /* Find @start in the runlist. */
+ while (likely(rl->length && start >= rl[1].vcn))
+ rl++;
+ rl_end = rl;
+ /* Find @end in the runlist. */
+ while (likely(rl_end->length && end >= rl_end[1].vcn)) {
+ /* Verify there are no unmapped or error elements. */
+ if (unlikely(rl_end->lcn < LCN_HOLE))
+ return -EINVAL;
+ rl_end++;
+ }
+ /* Check the last element. */
+ if (unlikely(rl_end->length && rl_end->lcn < LCN_HOLE))
+ return -EINVAL;
+ /* This covers @start being out of bounds, too. */
+ if (!rl_end->length && end > rl_end->vcn)
+ return -ENOENT;
+ if (!length)
+ return 0;
+ if (!rl->length)
+ return -ENOENT;
+ rl_real_end = rl_end;
+ /* Determine the runlist size. */
+ while (likely(rl_real_end->length))
+ rl_real_end++;
+ old_size = rl_real_end - runlist->rl + 1;
+ /* If @start is in a hole simply extend the hole. */
+ if (rl->lcn == LCN_HOLE) {
+ /*
+ * If both @start and @end are in the same sparse run, we are
+ * done.
+ */
+ if (end <= rl[1].vcn) {
+ ntfs_debug("Done (requested hole is already sparse).");
+ return 0;
+ }
+extend_hole:
+ /* Extend the hole. */
+ rl->length = end - rl->vcn;
+ /* If @end is in a hole, merge it with the current one. */
+ if (rl_end->lcn == LCN_HOLE) {
+ rl_end++;
+ rl->length = rl_end->vcn - rl->vcn;
+ }
+ /* We have done the hole. Now deal with the remaining tail. */
+ rl++;
+ /* Cut out all runlist elements up to @end. */
+ if (rl < rl_end)
+ memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+ sizeof(*rl));
+ /* Adjust the beginning of the tail if necessary. */
+ if (end > rl->vcn) {
+ s64 delta = end - rl->vcn;
+ rl->vcn = end;
+ rl->length -= delta;
+ /* Only adjust the lcn if it is real. */
+ if (rl->lcn >= 0)
+ rl->lcn += delta;
+ }
+shrink_allocation:
+ /* Reallocate memory if the allocation changed. */
+ if (rl < rl_end) {
+ rl = ntfs_rl_realloc(runlist->rl, old_size,
+ old_size - (rl_end - rl));
+ if (IS_ERR(rl))
+ ntfs_warning(vol->sb, "Failed to shrink "
+ "runlist buffer. This just "
+ "wastes a bit of memory "
+ "temporarily so we ignore it "
+ "and return success.");
+ else
+ runlist->rl = rl;
+ }
+ ntfs_debug("Done (extend hole).");
+ return 0;
+ }
+ /*
+ * If @start is at the beginning of a run things are easier as there is
+ * no need to split the first run.
+ */
+ if (start == rl->vcn) {
+ /*
+ * @start is at the beginning of a run.
+ *
+ * If the previous run is sparse, extend its hole.
+ *
+ * If @end is not in the same run, switch the run to be sparse
+ * and extend the newly created hole.
+ *
+ * Thus both of these cases reduce the problem to the above
+ * case of "@start is in a hole".
+ */
+ if (rl > runlist->rl && (rl - 1)->lcn == LCN_HOLE) {
+ rl--;
+ goto extend_hole;
+ }
+ if (end >= rl[1].vcn) {
+ rl->lcn = LCN_HOLE;
+ goto extend_hole;
+ }
+ /*
+ * The final case is when @end is in the same run as @start.
+ * For this need to split the run into two. One run for the
+ * sparse region between the beginning of the old run, i.e.
+ * @start, and @end and one for the remaining non-sparse
+ * region, i.e. between @end and the end of the old run.
+ */
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size++;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+split_end:
+ /* Shift all the runs up by one. */
+ memmove(rl + 1, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+ /* Finally, setup the two split runs. */
+ rl->lcn = LCN_HOLE;
+ rl->length = length;
+ rl++;
+ rl->vcn += length;
+ /* Only adjust the lcn if it is real. */
+ if (rl->lcn >= 0 || lcn_fixup)
+ rl->lcn += length;
+ rl->length -= length;
+ ntfs_debug("Done (split one).");
+ return 0;
+ }
+ /*
+ * @start is neither in a hole nor at the beginning of a run.
+ *
+ * If @end is in a hole, things are easier as simply truncating the run
+ * @start is in to end at @start - 1, deleting all runs after that up
+ * to @end, and finally extending the beginning of the run @end is in
+ * to be @start is all that is needed.
+ */
+ if (rl_end->lcn == LCN_HOLE) {
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ /* Cut out all runlist elements up to @end. */
+ if (rl < rl_end)
+ memmove(rl, rl_end, (rl_real_end - rl_end + 1) *
+ sizeof(*rl));
+ /* Extend the beginning of the run @end is in to be @start. */
+ rl->vcn = start;
+ rl->length = rl[1].vcn - start;
+ goto shrink_allocation;
+ }
+ /*
+ * If @end is not in a hole there are still two cases to distinguish.
+ * Either @end is or is not in the same run as @start.
+ *
+ * The second case is easier as it can be reduced to an already solved
+ * problem by truncating the run @start is in to end at @start - 1.
+ * Then, if @end is in the next run need to split the run into a sparse
+ * run followed by a non-sparse run (already covered above) and if @end
+ * is not in the next run switching it to be sparse, again reduces the
+ * problem to the already covered case of "@start is in a hole".
+ */
+ if (end >= rl[1].vcn) {
+ /*
+ * If @end is not in the next run, reduce the problem to the
+ * case of "@start is in a hole".
+ */
+ if (rl[1].length && end >= rl[2].vcn) {
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ rl->vcn = start;
+ rl->lcn = LCN_HOLE;
+ goto extend_hole;
+ }
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 1);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size++;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+ /* Truncate the run containing @start. */
+ rl->length = start - rl->vcn;
+ rl++;
+ /*
+ * @end is in the next run, reduce the problem to the case
+ * where "@start is at the beginning of a run and @end is in
+ * the same run as @start".
+ */
+ delta = rl->vcn - start;
+ rl->vcn = start;
+ if (rl->lcn >= 0) {
+ rl->lcn -= delta;
+ /* Need this in case the lcn just became negative. */
+ lcn_fixup = TRUE;
+ }
+ rl->length += delta;
+ goto split_end;
+ }
+ /*
+ * The first case from above, i.e. @end is in the same run as @start.
+ * We need to split the run into three. One run for the non-sparse
+ * region between the beginning of the old run and @start, one for the
+ * sparse region between @start and @end, and one for the remaining
+ * non-sparse region, i.e. between @end and the end of the old run.
+ */
+ trl = ntfs_rl_realloc(runlist->rl, old_size, old_size + 2);
+ if (IS_ERR(trl))
+ goto enomem_out;
+ old_size += 2;
+ if (runlist->rl != trl) {
+ rl = trl + (rl - runlist->rl);
+ rl_end = trl + (rl_end - runlist->rl);
+ rl_real_end = trl + (rl_real_end - runlist->rl);
+ runlist->rl = trl;
+ }
+ /* Shift all the runs up by two. */
+ memmove(rl + 2, rl, (rl_real_end - rl + 1) * sizeof(*rl));
+ /* Finally, setup the three split runs. */
+ rl->length = start - rl->vcn;
+ rl++;
+ rl->vcn = start;
+ rl->lcn = LCN_HOLE;
+ rl->length = length;
+ rl++;
+ delta = end - rl->vcn;
+ rl->vcn = end;
+ rl->lcn += delta;
+ rl->length -= delta;
+ ntfs_debug("Done (split both).");
+ return 0;
+enomem_out:
+ ntfs_error(vol->sb, "Not enough memory to extend runlist buffer.");
+ return -ENOMEM;
+}
+
#endif /* NTFS_RW */
diff --git a/fs/ntfs/runlist.h b/fs/ntfs/runlist.h
index aa0ee6540e7c..47728fbb610b 100644
--- a/fs/ntfs/runlist.h
+++ b/fs/ntfs/runlist.h
@@ -94,6 +94,9 @@ extern int ntfs_mapping_pairs_build(const ntfs_volume *vol, s8 *dst,
extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol,
runlist *const runlist, const s64 new_length);
+int ntfs_rl_punch_nolock(const ntfs_volume *vol, runlist *const runlist,
+ const VCN start, const s64 length);
+
#endif /* NTFS_RW */
#endif /* _LINUX_NTFS_RUNLIST_H */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 41aa8eb6755b..453d0d51ea4b 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -126,6 +126,14 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
if (*v) \
goto needs_val; \
}
+#define NTFS_GETOPT_OCTAL(option, variable) \
+ if (!strcmp(p, option)) { \
+ if (!v || !*v) \
+ goto needs_arg; \
+ variable = simple_strtoul(ov = v, &v, 8); \
+ if (*v) \
+ goto needs_val; \
+ }
#define NTFS_GETOPT_BOOL(option, variable) \
if (!strcmp(p, option)) { \
BOOL val; \
@@ -157,9 +165,9 @@ static BOOL parse_options(ntfs_volume *vol, char *opt)
*v++ = 0;
NTFS_GETOPT("uid", uid)
else NTFS_GETOPT("gid", gid)
- else NTFS_GETOPT("umask", fmask = dmask)
- else NTFS_GETOPT("fmask", fmask)
- else NTFS_GETOPT("dmask", dmask)
+ else NTFS_GETOPT_OCTAL("umask", fmask = dmask)
+ else NTFS_GETOPT_OCTAL("fmask", fmask)
+ else NTFS_GETOPT_OCTAL("dmask", dmask)
else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier)
else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE)
else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files)
@@ -1133,7 +1141,8 @@ mft_unmap_out:
*
* Return TRUE on success or FALSE on error.
*/
-static BOOL load_and_check_logfile(ntfs_volume *vol)
+static BOOL load_and_check_logfile(ntfs_volume *vol,
+ RESTART_PAGE_HEADER **rp)
{
struct inode *tmp_ino;
@@ -1145,7 +1154,7 @@ static BOOL load_and_check_logfile(ntfs_volume *vol)
/* Caller will display error message. */
return FALSE;
}
- if (!ntfs_check_logfile(tmp_ino)) {
+ if (!ntfs_check_logfile(tmp_ino, rp)) {
iput(tmp_ino);
/* ntfs_check_logfile() will have displayed error output. */
return FALSE;
@@ -1689,6 +1698,7 @@ static BOOL load_system_files(ntfs_volume *vol)
VOLUME_INFORMATION *vi;
ntfs_attr_search_ctx *ctx;
#ifdef NTFS_RW
+ RESTART_PAGE_HEADER *rp;
int err;
#endif /* NTFS_RW */
@@ -1841,8 +1851,9 @@ get_ctx_vol_failed:
* Get the inode for the logfile, check it and determine if the volume
* was shutdown cleanly.
*/
- if (!load_and_check_logfile(vol) ||
- !ntfs_is_logfile_clean(vol->logfile_ino)) {
+ rp = NULL;
+ if (!load_and_check_logfile(vol, &rp) ||
+ !ntfs_is_logfile_clean(vol->logfile_ino, rp)) {
static const char *es1a = "Failed to load $LogFile";
static const char *es1b = "$LogFile is not clean";
static const char *es2 = ". Mount in Windows.";
@@ -1857,6 +1868,10 @@ get_ctx_vol_failed:
"continue nor on_errors="
"remount-ro was specified%s",
es1, es2);
+ if (vol->logfile_ino) {
+ BUG_ON(!rp);
+ ntfs_free(rp);
+ }
goto iput_logfile_err_out;
}
sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME;
@@ -1867,6 +1882,7 @@ get_ctx_vol_failed:
/* This will prevent a read-write remount. */
NVolSetErrors(vol);
}
+ ntfs_free(rp);
#endif /* NTFS_RW */
/* Get the root directory inode so we can do path lookups. */
vol->root_ino = ntfs_iget(sb, FILE_root);
diff --git a/fs/ntfs/unistr.c b/fs/ntfs/unistr.c
index 19c42e231b44..a389a5a16c84 100644
--- a/fs/ntfs/unistr.c
+++ b/fs/ntfs/unistr.c
@@ -372,7 +372,8 @@ retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o,
return -EINVAL;
conversion_err:
ntfs_error(vol->sb, "Unicode name contains characters that cannot be "
- "converted to character set %s.", nls->charset);
+ "converted to character set %s. You might want to "
+ "try to use the mount option nls=utf8.", nls->charset);
if (ns != *outs)
kfree(ns);
if (wc != -ENAMETOOLONG)
diff --git a/fs/open.c b/fs/open.c
index 4ee2dcc31c28..2fac58c51910 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
#include <linux/personality.h>
#include <linux/pagemap.h>
#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
#include <asm/unistd.h>
@@ -842,14 +843,16 @@ int get_unused_fd(void)
{
struct files_struct * files = current->files;
int fd, error;
+ struct fdtable *fdt;
error = -EMFILE;
spin_lock(&files->file_lock);
repeat:
- fd = find_next_zero_bit(files->open_fds->fds_bits,
- files->max_fdset,
- files->next_fd);
+ fdt = files_fdtable(files);
+ fd = find_next_zero_bit(fdt->open_fds->fds_bits,
+ fdt->max_fdset,
+ fdt->next_fd);
/*
* N.B. For clone tasks sharing a files structure, this test
@@ -872,14 +875,14 @@ repeat:
goto repeat;
}
- FD_SET(fd, files->open_fds);
- FD_CLR(fd, files->close_on_exec);
- files->next_fd = fd + 1;
+ FD_SET(fd, fdt->open_fds);
+ FD_CLR(fd, fdt->close_on_exec);
+ fdt->next_fd = fd + 1;
#if 1
/* Sanity check */
- if (files->fd[fd] != NULL) {
+ if (fdt->fd[fd] != NULL) {
printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
- files->fd[fd] = NULL;
+ fdt->fd[fd] = NULL;
}
#endif
error = fd;
@@ -893,9 +896,10 @@ EXPORT_SYMBOL(get_unused_fd);
static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
- __FD_CLR(fd, files->open_fds);
- if (fd < files->next_fd)
- files->next_fd = fd;
+ struct fdtable *fdt = files_fdtable(files);
+ __FD_CLR(fd, fdt->open_fds);
+ if (fd < fdt->next_fd)
+ fdt->next_fd = fd;
}
void fastcall put_unused_fd(unsigned int fd)
@@ -924,10 +928,11 @@ EXPORT_SYMBOL(put_unused_fd);
void fastcall fd_install(unsigned int fd, struct file * file)
{
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (unlikely(files->fd[fd] != NULL))
- BUG();
- files->fd[fd] = file;
+ fdt = files_fdtable(files);
+ BUG_ON(fdt->fd[fd] != NULL);
+ rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
}
@@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
+ struct fdtable *fdt;
spin_lock(&files->file_lock);
- if (fd >= files->max_fds)
+ fdt = files_fdtable(files);
+ if (fd >= fdt->max_fds)
goto out_unlock;
- filp = files->fd[fd];
+ filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
- files->fd[fd] = NULL;
- FD_CLR(fd, files->close_on_exec);
+ rcu_assign_pointer(fdt->fd[fd], NULL);
+ FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
return filp_close(filp, files);
diff --git a/fs/pipe.c b/fs/pipe.c
index 2c7a23dde2d8..66aa0b938d6a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -39,7 +39,11 @@ void pipe_wait(struct inode * inode)
{
DEFINE_WAIT(wait);
- prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE);
+ /*
+ * Pipes are system-local resources, so sleeping on them
+ * is considered a noninteractive wait:
+ */
+ prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
up(PIPE_SEM(*inode));
schedule();
finish_wait(PIPE_WAIT(*inode), &wait);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 37668fe998ad..d88d518d30f6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
{
struct group_info *group_info;
int g;
+ struct fdtable *fdt = NULL;
read_lock(&tasklist_lock);
buffer += sprintf(buffer,
@@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer)
p->gid, p->egid, p->sgid, p->fsgid);
read_unlock(&tasklist_lock);
task_lock(p);
+ if (p->files)
+ fdt = files_fdtable(p->files);
buffer += sprintf(buffer,
"FDSize:\t%d\n"
"Groups:\t",
- p->files ? p->files->max_fds : 0);
+ fdt ? fdt->max_fds : 0);
group_info = p->group_info;
get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 84751f3f52d5..23db452ab428 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -62,6 +62,7 @@
#include <linux/namespace.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
+#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
#include <linux/mount.h>
#include <linux/security.h>
@@ -283,16 +284,16 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (file) {
*mnt = mntget(file->f_vfsmnt);
*dentry = dget(file->f_dentry);
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
return 0;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
return -ENOENT;
@@ -1039,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
int retval;
char buf[NUMBUF];
struct files_struct * files;
+ struct fdtable *fdt;
retval = -ENOENT;
if (!pid_alive(p))
@@ -1061,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
files = get_files_struct(p);
if (!files)
goto out;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
+ fdt = files_fdtable(files);
for (fd = filp->f_pos-2;
- fd < files->max_fds;
+ fd < fdt->max_fds;
fd++, filp->f_pos++) {
unsigned int i,j;
if (!fcheck_files(files, fd))
continue;
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
j = NUMBUF;
i = fd;
@@ -1081,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
break;
}
- spin_lock(&files->file_lock);
+ rcu_read_lock();
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
out:
@@ -1261,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
files = get_files_struct(task);
if (files) {
- spin_lock(&files->file_lock);
+ rcu_read_lock();
if (fcheck_files(files, fd)) {
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
if (task_dumpable(task)) {
inode->i_uid = task->euid;
@@ -1275,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
security_task_to_inode(task, inode);
return 1;
}
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
}
d_drop(dentry);
@@ -1367,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
if (!files)
goto out_unlock;
inode->i_mode = S_IFLNK;
- spin_lock(&files->file_lock);
+ rcu_read_lock();
file = fcheck_files(files, fd);
if (!file)
goto out_unlock2;
@@ -1375,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
inode->i_mode |= S_IRUSR | S_IXUSR;
if (file->f_mode & 2)
inode->i_mode |= S_IWUSR | S_IXUSR;
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
@@ -1385,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
return NULL;
out_unlock2:
- spin_unlock(&files->file_lock);
+ rcu_read_unlock();
put_files_struct(files);
out_unlock:
iput(inode);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 133c28685105..effa6c0c467a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode)
struct proc_dir_entry *de;
struct task_struct *tsk;
+ truncate_inode_pages(&inode->i_data, 0);
+
/* Let go of any associated process */
tsk = PROC_I(inode)->task;
if (tsk)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b79162a35478..80f32911c0cb 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -63,6 +63,7 @@ int qnx4_sync_inode(struct inode *inode)
static void qnx4_delete_inode(struct inode *inode)
{
QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
qnx4_truncate(inode);
lock_kernel();
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ff291c973a56..1a8a1bf2154d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -33,6 +33,8 @@ void reiserfs_delete_inode(struct inode *inode)
2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+ truncate_inode_pages(&inode->i_data, 0);
+
reiserfs_write_lock(inode->i_sb);
/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index a8e29e9bbbd0..4b15761434bc 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2868,8 +2868,7 @@ static void let_transaction_grow(struct super_block *sb, unsigned long trans_id)
struct reiserfs_journal *journal = SB_JOURNAL(sb);
unsigned long bcount = journal->j_bcount;
while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(1);
+ schedule_timeout_uninterruptible(1);
journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
while ((atomic_read(&journal->j_wcount) > 0 ||
atomic_read(&journal->j_jlock)) &&
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 6951c35755be..44b02fc02ebe 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1934,8 +1934,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
if (SB_AP_BITMAP(s))
brelse(SB_AP_BITMAP(s)[j].bh);
}
- if (SB_AP_BITMAP(s))
- vfree(SB_AP_BITMAP(s));
+ vfree(SB_AP_BITMAP(s));
}
if (SB_BUFFER_WITH_SB(s))
brelse(SB_BUFFER_WITH_SB(s));
diff --git a/fs/select.c b/fs/select.c
index b80e7eb0ac0d..f10a10317d54 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -22,6 +22,7 @@
#include <linux/personality.h> /* for STICKY_TIMEOUTS */
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
@@ -132,11 +133,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
unsigned long *open_fds;
unsigned long set;
int max;
+ struct fdtable *fdt;
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
- open_fds = current->files->open_fds->fds_bits+n;
+ fdt = files_fdtable(current->files);
+ open_fds = fdt->open_fds->fds_bits+n;
max = 0;
if (set) {
set &= BITS(fds, n);
@@ -183,9 +186,9 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
int retval, i;
long __timeout = *timeout;
- spin_lock(&current->files->file_lock);
+ rcu_read_lock();
retval = max_select_fd(n, fds);
- spin_unlock(&current->files->file_lock);
+ rcu_read_unlock();
if (retval < 0)
return retval;
@@ -299,6 +302,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
char *bits;
long timeout;
int ret, size, max_fdset;
+ struct fdtable *fdt;
timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
@@ -326,7 +330,10 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s
goto out_nofds;
/* max_fdset can increase, so grab it once to avoid race */
- max_fdset = current->files->max_fdset;
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
if (n > max_fdset)
n = max_fdset;
@@ -464,9 +471,15 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti
unsigned int i;
struct poll_list *head;
struct poll_list *walk;
+ struct fdtable *fdt;
+ int max_fdset;
/* Do a sanity check on nfds ... */
- if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
+ rcu_read_lock();
+ fdt = files_fdtable(current->files);
+ max_fdset = fdt->max_fdset;
+ rcu_read_unlock();
+ if (nfds > max_fdset && nfds > OPEN_MAX)
return -EINVAL;
if (timeout) {
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 4765aaac9fd2..10b994428fef 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -331,6 +331,7 @@ static void
smb_delete_inode(struct inode *ino)
{
DEBUG1("ino=%ld\n", ino->i_ino);
+ truncate_inode_pages(&ino->i_data, 0);
lock_kernel();
if (smb_close(ino))
PARANOIA("could not close inode %ld\n", ino->i_ino);
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 220babe91efd..38ab558835c4 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -2397,8 +2397,7 @@ smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
/* a damn Win95 bug - sometimes it clags if you
ask it too fast */
- current->state = TASK_INTERRUPTIBLE;
- schedule_timeout(HZ/5);
+ schedule_timeout_interruptible(msecs_to_jiffies(200));
continue;
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0530077d9dd8..fa33eceb0011 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -292,6 +292,7 @@ int sysv_sync_inode(struct inode * inode)
static void sysv_delete_inode(struct inode *inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
sysv_truncate(inode);
lock_kernel();
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3d68de39fad6..b83890beaaac 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -87,6 +87,8 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
*/
void udf_delete_inode(struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
+
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 718627ca8b5c..55f4aa16e3fc 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -804,6 +804,7 @@ int ufs_sync_inode (struct inode *inode)
void ufs_delete_inode (struct inode * inode)
{
+ truncate_inode_pages(&inode->i_data, 0);
/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
lock_kernel();
mark_inode_dirty(inode);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index c92306f0fdc5..8e8f32dabe53 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,3 @@
-menu "XFS support"
-
config XFS_FS
tristate "XFS filesystem support"
select EXPORTFS if NFSD!=n
@@ -22,27 +20,11 @@ config XFS_FS
config XFS_EXPORT
bool
- default y if XFS_FS && EXPORTFS
-
-config XFS_RT
- bool "Realtime support (EXPERIMENTAL)"
- depends on XFS_FS && EXPERIMENTAL
- help
- If you say Y here you will be able to mount and use XFS filesystems
- which contain a realtime subvolume. The realtime subvolume is a
- separate area of disk space where only file data is stored. The
- realtime subvolume is designed to provide very deterministic
- data rates suitable for media streaming applications.
-
- See the xfs man page in section 5 for a bit more information.
-
- This feature is unsupported at this time, is not yet fully
- functional, and may cause serious problems.
-
- If unsure, say N.
+ depends on XFS_FS && EXPORTFS
+ default y
config XFS_QUOTA
- bool "Quota support"
+ tristate "XFS Quota support"
depends on XFS_FS
help
If you say Y here, you will be able to set limits for disk usage on
@@ -59,7 +41,7 @@ config XFS_QUOTA
they are completely independent subsystems.
config XFS_SECURITY
- bool "Security Label support"
+ bool "XFS Security Label support"
depends on XFS_FS
help
Security labels support alternative access control models
@@ -71,7 +53,7 @@ config XFS_SECURITY
extended attributes for inode security labels, say N.
config XFS_POSIX_ACL
- bool "POSIX ACL support"
+ bool "XFS POSIX ACL support"
depends on XFS_FS
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -82,4 +64,19 @@ config XFS_POSIX_ACL
If you don't know what Access Control Lists are, say N.
-endmenu
+config XFS_RT
+ bool "XFS Realtime support (EXPERIMENTAL)"
+ depends on XFS_FS && EXPERIMENTAL
+ help
+ If you say Y here you will be able to mount and use XFS filesystems
+ which contain a realtime subvolume. The realtime subvolume is a
+ separate area of disk space where only file data is stored. The
+ realtime subvolume is designed to provide very deterministic
+ data rates suitable for media streaming applications.
+
+ See the xfs man page in section 5 for a bit more information.
+
+ This feature is unsupported at this time, is not yet fully
+ functional, and may cause serious problems.
+
+ If unsure, say N.
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
index fbfcbe5a7cda..d8c87fa21ad1 100644
--- a/fs/xfs/Makefile-linux-2.6
+++ b/fs/xfs/Makefile-linux-2.6
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+# Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of version 2 of the GNU General Public License as
@@ -55,7 +55,18 @@ ifeq ($(CONFIG_XFS_TRACE),y)
endif
obj-$(CONFIG_XFS_FS) += xfs.o
-obj-$(CONFIG_XFS_QUOTA) += quota/
+
+xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
+ xfs_dquot.o \
+ xfs_dquot_item.o \
+ xfs_trans_dquot.o \
+ xfs_qm_syscalls.o \
+ xfs_qm_bhv.o \
+ xfs_qm.o)
+
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
+endif
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
index 6c6fd0faa8e1..b0d2873ab274 100644
--- a/fs/xfs/linux-2.6/time.h
+++ b/fs/xfs/linux-2.6/time.h
@@ -39,8 +39,7 @@ typedef struct timespec timespec_t;
static inline void delay(long ticks)
{
- set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_timeout(ticks);
+ schedule_timeout_uninterruptible(ticks);
}
static inline void nanotime(struct timespec *tvp)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 655bf4a78afe..e82cf72ac599 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1780,10 +1780,10 @@ xfsbufd(
xfsbufd_force_sleep = 0;
}
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout((xfs_buf_timer_centisecs * HZ) / 100);
+ schedule_timeout_interruptible
+ (xfs_buf_timer_centisecs * msecs_to_jiffies(10));
- age = (xfs_buf_age_centisecs * HZ) / 100;
+ age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
spin_lock(&pbd_delwrite_lock);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 0da87bfc9999..2302454d8d47 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -467,7 +467,7 @@ xfs_flush_inode(
igrab(inode);
xfs_syncd_queue_work(vfs, inode, xfs_flush_inode_work);
- delay(HZ/2);
+ delay(msecs_to_jiffies(500));
}
/*
@@ -492,7 +492,7 @@ xfs_flush_device(
igrab(inode);
xfs_syncd_queue_work(vfs, inode, xfs_flush_device_work);
- delay(HZ/2);
+ delay(msecs_to_jiffies(500));
xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
}
@@ -520,10 +520,9 @@ xfssyncd(
struct vfs_sync_work *work, *n;
LIST_HEAD (tmp);
- timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- timeleft = schedule_timeout(timeleft);
+ timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
if (kthread_should_stop())
@@ -537,7 +536,8 @@ xfssyncd(
*/
if (!timeleft || list_empty(&vfsp->vfs_sync_list)) {
if (!timeleft)
- timeleft = (xfs_syncd_centisecs * HZ) / 100;
+ timeleft = xfs_syncd_centisecs *
+ msecs_to_jiffies(10);
INIT_LIST_HEAD(&vfsp->vfs_sync_work.w_list);
list_add_tail(&vfsp->vfs_sync_work.w_list,
&vfsp->vfs_sync_list);
diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6
index 8b7b676718b9..93e60e839355 100644
--- a/fs/xfs/quota/Makefile-linux-2.6
+++ b/fs/xfs/quota/Makefile-linux-2.6
@@ -41,13 +41,13 @@ ifeq ($(CONFIG_XFS_TRACE),y)
EXTRA_CFLAGS += -DXFS_VNODE_TRACE
endif
-obj-$(CONFIG_XFS_QUOTA) += xfs_quota.o
-
-xfs_quota-y += xfs_dquot.o \
+xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
xfs_dquot_item.o \
xfs_trans_dquot.o \
xfs_qm_syscalls.o \
xfs_qm_bhv.o \
xfs_qm.o
-xfs_quota-$(CONFIG_PROC_FS) += xfs_qm_stats.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
+endif
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 3dae14c8c55a..fa8394f9437d 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -170,7 +170,7 @@ ktrace_enter(
void *val14,
void *val15)
{
- static lock_t wrap_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(wrap_lock);
unsigned long flags;
int index;
ktrace_entry_t *ktep;
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index ae35189b3d70..5ab0dd885b1b 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -40,22 +40,28 @@
#include <asm/byteorder.h>
-#ifdef __LITTLE_ENDIAN
-# define __BYTE_ORDER __LITTLE_ENDIAN
-#endif
#ifdef __BIG_ENDIAN
-# define __BYTE_ORDER __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
+#endif
+
+#else /* __KERNEL__ */
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define XFS_NATIVE_HOST 1
+#else
+#undef XFS_NATIVE_HOST
#endif
#endif /* __KERNEL__ */
/* do we need conversion? */
-
#define ARCH_NOCONVERT 1
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-# define ARCH_CONVERT 0
-#else
+#ifdef XFS_NATIVE_HOST
# define ARCH_CONVERT ARCH_NOCONVERT
+#else
+# define ARCH_CONVERT 0
#endif
/* generic swapping macros */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 09c413576ba8..09a77b17565b 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -2017,7 +2017,7 @@ xfs_bmbt_get_state(
ext_flag);
}
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
/* Endian flipping versions of the bmbt extraction functions */
void
xfs_bmbt_disk_get_all(
@@ -2087,7 +2087,7 @@ xfs_bmbt_disk_get_state(
return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
ext_flag);
}
-#endif
+#endif /* XFS_NATIVE_HOST */
/*
@@ -2531,7 +2531,7 @@ xfs_bmbt_set_allf(
#endif /* XFS_BIG_BLKNOS */
}
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
/*
* Set all the fields in a bmap extent record from the uncompressed form.
*/
@@ -2617,7 +2617,7 @@ xfs_bmbt_disk_set_allf(
}
#endif /* XFS_BIG_BLKNOS */
}
-#endif
+#endif /* XFS_NATIVE_HOST */
/*
* Set the blockcount field in a bmap extent record.
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 0a40cf126c28..2cf4fe45cbcb 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -62,7 +62,7 @@ typedef struct xfs_bmdr_block
* l1:0-20 are blockcount.
*/
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
#define BMBT_TOTAL_BITLEN 128 /* 128 bits, 16 bytes */
#define BMBT_EXNTFLAG_BITOFF 0
@@ -87,7 +87,7 @@ typedef struct xfs_bmdr_block
#define BMBT_BLOCKCOUNT_BITOFF 64 /* Start of second 64 bit container */
#define BMBT_BLOCKCOUNT_BITLEN 21
-#endif
+#endif /* XFS_NATIVE_HOST */
#define BMBT_USE_64 1
@@ -505,7 +505,7 @@ xfs_exntst_t
xfs_bmbt_get_state(
xfs_bmbt_rec_t *r);
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
void
xfs_bmbt_disk_get_all(
xfs_bmbt_rec_t *r,
@@ -538,7 +538,7 @@ xfs_bmbt_disk_get_startoff(
xfs_bmbt_get_blockcount(r)
#define xfs_bmbt_disk_get_startoff(r) \
xfs_bmbt_get_startoff(r)
-#endif
+#endif /* XFS_NATIVE_HOST */
int
xfs_bmbt_increment(
@@ -623,7 +623,7 @@ xfs_bmbt_set_state(
xfs_bmbt_rec_t *r,
xfs_exntst_t v);
-#if __BYTE_ORDER != __BIG_ENDIAN
+#ifndef XFS_NATIVE_HOST
void
xfs_bmbt_disk_set_all(
xfs_bmbt_rec_t *r,
@@ -641,7 +641,7 @@ xfs_bmbt_disk_set_allf(
xfs_bmbt_set_all(r, s)
#define xfs_bmbt_disk_set_allf(r, o, b, c, v) \
xfs_bmbt_set_allf(r, o, b, c, v)
-#endif
+#endif /* XFS_NATIVE_HOST */
void
xfs_bmbt_to_bmdr(
diff --git a/fs/xfs/xfs_dir_leaf.h b/fs/xfs/xfs_dir_leaf.h
index dd423ce1bc8d..480bffc1f29f 100644
--- a/fs/xfs/xfs_dir_leaf.h
+++ b/fs/xfs/xfs_dir_leaf.h
@@ -127,13 +127,13 @@ typedef union {
* Watch the order here (endian-ness dependent).
*/
struct {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
xfs_dahash_t h; /* hash value */
__uint32_t be; /* block and entry */
-#else /* __BYTE_ORDER == __BIG_ENDIAN */
+#else
__uint32_t be; /* block and entry */
xfs_dahash_t h; /* hash value */
-#endif /* __BYTE_ORDER == __BIG_ENDIAN */
+#endif /* XFS_NATIVE_HOST */
} s;
} xfs_dircook_t;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 276ec70eb7f9..50e2cadf9091 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -341,7 +341,7 @@ xfs_inode_item_format(
nrecs = ip->i_df.if_bytes /
(uint)sizeof(xfs_bmbt_rec_t);
ASSERT(nrecs > 0);
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
if (nrecs == ip->i_d.di_nextents) {
/*
* There are no delayed allocation
@@ -473,7 +473,7 @@ xfs_inode_item_format(
#endif
ASSERT(nrecs > 0);
ASSERT(nrecs == ip->i_d.di_anextents);
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
/*
* There are not delayed allocation extents
* for attributes, so just point at the array.
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index eb7fdc6ebc32..a884cea82fca 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -112,7 +112,7 @@ struct xfs_mount;
* this has endian issues, of course.
*/
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#ifndef XFS_NATIVE_HOST
#define GET_CLIENT_ID(i,arch) \
((i) & 0xff)
#else
@@ -414,14 +414,10 @@ typedef struct xlog_op_header {
#define XLOG_FMT_IRIX_BE 3
/* our fmt */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-#define XLOG_FMT XLOG_FMT_LINUX_LE
-#else
-#if __BYTE_ORDER == __BIG_ENDIAN
+#ifdef XFS_NATIVE_HOST
#define XLOG_FMT XLOG_FMT_LINUX_BE
#else
-#error unknown byte order
-#endif
+#define XLOG_FMT XLOG_FMT_LINUX_LE
#endif
typedef struct xlog_rec_header {