diff options
Diffstat (limited to 'fs')
77 files changed, 732 insertions, 327 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 515455296378..535ab6eccb1a 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -262,7 +262,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name, if (strcmp(name, "") != 0) return -EINVAL; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); /* * We allow set/get/list of acl when access=client is not specified */ @@ -312,7 +312,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, if (strcmp(name, "") != 0) return -EINVAL; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); /* * set the attribute on the remote. Without even looking at the * xattr value. We leave it to the server to validate @@ -323,7 +323,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { /* update the cached acl value */ diff --git a/fs/9p/fid.c b/fs/9p/fid.c index cd63e002d826..0ee594569dcc 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -134,7 +134,7 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, struct v9fs_session_info *v9ses; struct p9_fid *fid, *old_fid = NULL; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; fid = v9fs_fid_find(dentry, uid, any); if (fid) @@ -237,7 +237,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) int any, access; struct v9fs_session_info *v9ses; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; switch (access) { case V9FS_ACCESS_SINGLE: @@ -286,9 +286,11 @@ static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) { - int err; + int err, flags; struct p9_fid *fid; + struct v9fs_session_info *v9ses; + v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_clone_with_uid(dentry, 0); if (IS_ERR(fid)) goto error_out; @@ -297,8 +299,17 @@ struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) * dirty pages. We always request for the open fid in read-write * mode so that a partial page write which result in page * read can work. + * + * we don't have a tsyncfs operation for older version + * of protocol. So make sure the write back fid is + * opened in O_SYNC mode. */ - err = p9_client_open(fid, O_RDWR); + if (!v9fs_proto_dotl(v9ses)) + flags = O_RDWR | O_SYNC; + else + flags = O_RDWR; + + err = p9_client_open(fid, flags); if (err < 0) { p9_client_clunk(fid); fid = ERR_PTR(err); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index bd8496db135b..9665c2b840e6 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -130,6 +130,7 @@ struct v9fs_inode { #endif unsigned int cache_validity; struct p9_fid *writeback_fid; + struct mutex v_mutex; struct inode vfs_inode; }; @@ -173,6 +174,11 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode) return (inode->i_sb->s_fs_info); } +static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry) +{ + return dentry->d_sb->s_fs_info; +} + static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) { return v9ses->flags & V9FS_PROTO_2000U; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 78bcb97c3425..ffed55817f0c 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -90,7 +90,9 @@ int v9fs_file_open(struct inode *inode, struct file *file) } file->private_data = fid; - if (v9ses->cache && !v9inode->writeback_fid) { + mutex_lock(&v9inode->v_mutex); + if (v9ses->cache && !v9inode->writeback_fid && + ((file->f_flags & O_ACCMODE) != O_RDONLY)) { /* * clone a fid and add it to writeback_fid * we do it during open time instead of @@ -101,10 +103,12 @@ int v9fs_file_open(struct inode *inode, struct file *file) fid = v9fs_writeback_fid(file->f_path.dentry); if (IS_ERR(fid)) { err = PTR_ERR(fid); + mutex_unlock(&v9inode->v_mutex); goto out_error; } v9inode->writeback_fid = (void *) fid; } + mutex_unlock(&v9inode->v_mutex); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, file); @@ -504,9 +508,12 @@ v9fs_file_write(struct file *filp, const char __user * data, if (!count) goto out; - return v9fs_file_write_internal(filp->f_path.dentry->d_inode, + retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode, filp->private_data, - data, count, offset, 1); + data, count, &origin, 1); + /* update offset on successful write */ + if (retval > 0) + *offset = origin; out: return retval; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8a2c232f708a..7f6c67703195 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -221,6 +221,7 @@ struct inode *v9fs_alloc_inode(struct super_block *sb) #endif v9inode->writeback_fid = NULL; v9inode->cache_validity = 0; + mutex_init(&v9inode->v_mutex); return &v9inode->vfs_inode; } @@ -650,7 +651,9 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, /* if we are opening a file, assign the open fid to the file */ if (nd && nd->flags & LOOKUP_OPEN) { v9inode = V9FS_I(dentry->d_inode); - if (v9ses->cache && !v9inode->writeback_fid) { + mutex_lock(&v9inode->v_mutex); + if (v9ses->cache && !v9inode->writeback_fid && + ((flags & O_ACCMODE) != O_RDONLY)) { /* * clone a fid and add it to writeback_fid * we do it during open time instead of @@ -661,10 +664,12 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, inode_fid = v9fs_writeback_fid(dentry); if (IS_ERR(inode_fid)) { err = PTR_ERR(inode_fid); + mutex_unlock(&v9inode->v_mutex); goto error; } v9inode->writeback_fid = (void *) inode_fid; } + mutex_unlock(&v9inode->v_mutex); filp = lookup_instantiate_filp(nd, dentry, generic_file_open); if (IS_ERR(filp)) { err = PTR_ERR(filp); @@ -931,7 +936,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); err = -EPERM; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { generic_fillattr(dentry->d_inode, stat); return 0; @@ -967,8 +972,12 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) struct p9_wstat wstat; P9_DPRINTK(P9_DEBUG_VFS, "\n"); + retval = inode_change_ok(dentry->d_inode, iattr); + if (retval) + return retval; + retval = -EPERM; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); if(IS_ERR(fid)) return PTR_ERR(fid); @@ -993,12 +1002,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) if (iattr->ia_valid & ATTR_GID) wstat.n_gid = iattr->ia_gid; } - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(dentry->d_inode)) { - retval = vmtruncate(dentry->d_inode, iattr->ia_size); - if (retval) - return retval; - } + /* Write all dirty data */ if (S_ISREG(dentry->d_inode->i_mode)) filemap_write_and_wait(dentry->d_inode->i_mapping); @@ -1006,6 +1010,11 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) retval = p9_client_wstat(fid, &wstat); if (retval < 0) return retval; + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(dentry->d_inode)) + truncate_setsize(dentry->d_inode, iattr->ia_size); + v9fs_invalidate_inode_attr(dentry->d_inode); setattr_copy(dentry->d_inode, iattr); @@ -1130,7 +1139,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); retval = -EPERM; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 67c138e94feb..ffbb113d5f33 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -245,7 +245,9 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, v9fs_set_create_acl(dentry, dacl, pacl); v9inode = V9FS_I(inode); - if (v9ses->cache && !v9inode->writeback_fid) { + mutex_lock(&v9inode->v_mutex); + if (v9ses->cache && !v9inode->writeback_fid && + ((flags & O_ACCMODE) != O_RDONLY)) { /* * clone a fid and add it to writeback_fid * we do it during open time instead of @@ -256,10 +258,12 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, inode_fid = v9fs_writeback_fid(dentry); if (IS_ERR(inode_fid)) { err = PTR_ERR(inode_fid); + mutex_unlock(&v9inode->v_mutex); goto error; } v9inode->writeback_fid = (void *) inode_fid; } + mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ filp = lookup_instantiate_filp(nd, dentry, generic_file_open); if (IS_ERR(filp)) { @@ -391,7 +395,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); err = -EPERM; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { generic_fillattr(dentry->d_inode, stat); return 0; @@ -448,17 +452,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; retval = -EPERM; - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); - if ((iattr->ia_valid & ATTR_SIZE) && - iattr->ia_size != i_size_read(dentry->d_inode)) { - retval = vmtruncate(dentry->d_inode, iattr->ia_size); - if (retval) - return retval; - } /* Write all dirty data */ if (S_ISREG(dentry->d_inode->i_mode)) filemap_write_and_wait(dentry->d_inode->i_mapping); @@ -466,8 +464,12 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) retval = p9_client_setattr(fid, &p9attr); if (retval < 0) return retval; - v9fs_invalidate_inode_attr(dentry->d_inode); + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(dentry->d_inode)) + truncate_setsize(dentry->d_inode, iattr->ia_size); + + v9fs_invalidate_inode_attr(dentry->d_inode); setattr_copy(dentry->d_inode, iattr); mark_inode_dirty(dentry->d_inode); if (iattr->ia_valid & ATTR_MODE) { diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 09fd08d1606f..f3eed3383e4f 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -262,7 +262,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf) goto done; } - v9ses = v9fs_inode2v9ses(dentry->d_inode); + v9ses = v9fs_dentry2v9ses(dentry); if (v9fs_proto_dotl(v9ses)) { res = p9_client_statfs(fid, &rs); if (res == 0) { diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index 2ff622f6f547..718ac1f440c6 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -50,6 +50,7 @@ struct adfs_sb_info { gid_t s_gid; /* owner gid */ umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ umode_t s_other_mask; /* ADFS other perm -> unix perm */ + int s_ftsuffix; /* ,xyz hex filetype suffix option */ __u32 s_ids_per_zone; /* max. no ids in one zone */ __u32 s_idlen; /* length of ID in map */ @@ -79,6 +80,10 @@ struct adfs_dir { int nr_buffers; struct buffer_head *bh[4]; + + /* big directories need allocated buffers */ + struct buffer_head **bh_fplus; + unsigned int pos; unsigned int parent_id; @@ -89,7 +94,7 @@ struct adfs_dir { /* * This is the overall maximum name length */ -#define ADFS_MAX_NAME_LEN 256 +#define ADFS_MAX_NAME_LEN (256 + 4) /* +4 for ,xyz hex filetype suffix */ struct object_info { __u32 parent_id; /* parent object id */ __u32 file_id; /* object id */ @@ -97,10 +102,26 @@ struct object_info { __u32 execaddr; /* execution address */ __u32 size; /* size */ __u8 attr; /* RISC OS attributes */ - unsigned char name_len; /* name length */ + unsigned int name_len; /* name length */ char name[ADFS_MAX_NAME_LEN];/* file name */ + + /* RISC OS file type (12-bit: derived from loadaddr) */ + __u16 filetype; }; +/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */ +static inline int append_filetype_suffix(char *buf, __u16 filetype) +{ + if (filetype == 0xffff) /* no explicit 12-bit file type was set */ + return 0; + + *buf++ = ','; + *buf++ = hex_asc_lo(filetype >> 8); + *buf++ = hex_asc_lo(filetype >> 4); + *buf++ = hex_asc_lo(filetype >> 0); + return 4; +} + struct adfs_dir_ops { int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); int (*setpos)(struct adfs_dir *dir, unsigned int fpos); diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index bafc71222e25..4bbe853ee50a 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen) *buf++ = *ptr; ptr++; } - *buf = '\0'; return buf - old_buf; } @@ -208,7 +207,8 @@ release_buffers: * convert a disk-based directory entry to a Linux ADFS directory entry */ static inline void -adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) +adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj, + struct adfs_direntry *de) { obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); obj->file_id = adfs_readval(de->dirinddiscadd, 3); @@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) obj->execaddr = adfs_readval(de->direxec, 4); obj->size = adfs_readval(de->dirlen, 4); obj->attr = de->newdiratts; + obj->filetype = -1; + + /* + * object is a file and is filetyped and timestamped? + * RISC OS 12-bit filetype is stored in load_address[19:8] + */ + if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && + (0xfff00000 == (0xfff00000 & obj->loadaddr))) { + obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); + + /* optionally append the ,xyz hex filetype suffix */ + if (ADFS_SB(dir->sb)->s_ftsuffix) + obj->name_len += + append_filetype_suffix( + &obj->name[obj->name_len], + obj->filetype); + } } /* @@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj) if (!de.dirobname[0]) return -ENOENT; - adfs_dir2obj(obj, &de); + adfs_dir2obj(dir, obj, &de); return 0; } diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 1796bb352d05..d9e3bee4e653 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include <linux/buffer_head.h> +#include <linux/slab.h> #include "adfs.h" #include "dir_fplus.h" @@ -22,30 +23,53 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct dir->nr_buffers = 0; + /* start off using fixed bh set - only alloc for big dirs */ + dir->bh_fplus = &dir->bh[0]; + block = __adfs_block_map(sb, id, 0); if (!block) { adfs_error(sb, "dir object %X has a hole at offset 0", id); goto out; } - dir->bh[0] = sb_bread(sb, block); - if (!dir->bh[0]) + dir->bh_fplus[0] = sb_bread(sb, block); + if (!dir->bh_fplus[0]) goto out; dir->nr_buffers += 1; - h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; + h = (struct adfs_bigdirheader *)dir->bh_fplus[0]->b_data; size = le32_to_cpu(h->bigdirsize); if (size != sz) { - printk(KERN_WARNING "adfs: adfs_fplus_read: directory header size\n" - " does not match directory size\n"); + printk(KERN_WARNING "adfs: adfs_fplus_read:" + " directory header size %X\n" + " does not match directory size %X\n", + size, sz); } if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 || h->bigdirversion[2] != 0 || size & 2047 || - h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) + h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) { + printk(KERN_WARNING "adfs: dir object %X has" + " malformed dir header\n", id); goto out; + } size >>= sb->s_blocksize_bits; + if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) { + /* this directory is too big for fixed bh set, must allocate */ + struct buffer_head **bh_fplus = + kzalloc(size * sizeof(struct buffer_head *), + GFP_KERNEL); + if (!bh_fplus) { + adfs_error(sb, "not enough memory for" + " dir object %X (%d blocks)", id, size); + goto out; + } + dir->bh_fplus = bh_fplus; + /* copy over the pointer to the block that we've already read */ + dir->bh_fplus[0] = dir->bh[0]; + } + for (blk = 1; blk < size; blk++) { block = __adfs_block_map(sb, id, blk); if (!block) { @@ -53,25 +77,44 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct goto out; } - dir->bh[blk] = sb_bread(sb, block); - if (!dir->bh[blk]) + dir->bh_fplus[blk] = sb_bread(sb, block); + if (!dir->bh_fplus[blk]) { + adfs_error(sb, "dir object %X failed read for" + " offset %d, mapped block %X", + id, blk, block); goto out; - dir->nr_buffers = blk; + } + + dir->nr_buffers += 1; } - t = (struct adfs_bigdirtail *)(dir->bh[size - 1]->b_data + (sb->s_blocksize - 8)); + t = (struct adfs_bigdirtail *) + (dir->bh_fplus[size - 1]->b_data + (sb->s_blocksize - 8)); if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) || t->bigdirendmasseq != h->startmasseq || - t->reserved[0] != 0 || t->reserved[1] != 0) + t->reserved[0] != 0 || t->reserved[1] != 0) { + printk(KERN_WARNING "adfs: dir object %X has " + "malformed dir end\n", id); goto out; + } dir->parent_id = le32_to_cpu(h->bigdirparent); dir->sb = sb; return 0; + out: - for (i = 0; i < dir->nr_buffers; i++) - brelse(dir->bh[i]); + if (dir->bh_fplus) { + for (i = 0; i < dir->nr_buffers; i++) + brelse(dir->bh_fplus[i]); + + if (&dir->bh[0] != dir->bh_fplus) + kfree(dir->bh_fplus); + + dir->bh_fplus = NULL; + } + + dir->nr_buffers = 0; dir->sb = NULL; return ret; } @@ -79,7 +122,8 @@ out: static int adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos) { - struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; + struct adfs_bigdirheader *h = + (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data; int ret = -ENOENT; if (fpos <= le32_to_cpu(h->bigdirentries)) { @@ -102,21 +146,27 @@ dir_memcpy(struct adfs_dir *dir, unsigned int offset, void *to, int len) partial = sb->s_blocksize - offset; if (partial >= len) - memcpy(to, dir->bh[buffer]->b_data + offset, len); + memcpy(to, dir->bh_fplus[buffer]->b_data + offset, len); else { char *c = (char *)to; remainder = len - partial; - memcpy(c, dir->bh[buffer]->b_data + offset, partial); - memcpy(c + partial, dir->bh[buffer + 1]->b_data, remainder); + memcpy(c, + dir->bh_fplus[buffer]->b_data + offset, + partial); + + memcpy(c + partial, + dir->bh_fplus[buffer + 1]->b_data, + remainder); } } static int adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) { - struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; + struct adfs_bigdirheader *h = + (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data; struct adfs_bigdirentry bde; unsigned int offset; int i, ret = -ENOENT; @@ -147,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) if (obj->name[i] == '/') obj->name[i] = '.'; + obj->filetype = -1; + + /* + * object is a file and is filetyped and timestamped? + * RISC OS 12-bit filetype is stored in load_address[19:8] + */ + if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) && + (0xfff00000 == (0xfff00000 & obj->loadaddr))) { + obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8); + + /* optionally append the ,xyz hex filetype suffix */ + if (ADFS_SB(dir->sb)->s_ftsuffix) + obj->name_len += + append_filetype_suffix( + &obj->name[obj->name_len], + obj->filetype); + } + dir->pos += 1; ret = 0; out: @@ -160,7 +228,7 @@ adfs_fplus_sync(struct adfs_dir *dir) int i; for (i = dir->nr_buffers - 1; i >= 0; i--) { - struct buffer_head *bh = dir->bh[i]; + struct buffer_head *bh = dir->bh_fplus[i]; sync_dirty_buffer(bh); if (buffer_req(bh) && !buffer_uptodate(bh)) err = -EIO; @@ -174,8 +242,17 @@ adfs_fplus_free(struct adfs_dir *dir) { int i; - for (i = 0; i < dir->nr_buffers; i++) - brelse(dir->bh[i]); + if (dir->bh_fplus) { + for (i = 0; i < dir->nr_buffers; i++) + brelse(dir->bh_fplus[i]); + + if (&dir->bh[0] != dir->bh_fplus) + kfree(dir->bh_fplus); + + dir->bh_fplus = NULL; + } + + dir->nr_buffers = 0; dir->sb = NULL; } diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 09fe40198d1c..92444e94f842 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -78,26 +78,13 @@ static const struct address_space_operations adfs_aops = { .bmap = _adfs_bmap }; -static inline unsigned int -adfs_filetype(struct inode *inode) -{ - unsigned int type; - - if (ADFS_I(inode)->stamped) - type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff; - else - type = (unsigned int) -1; - - return type; -} - /* * Convert ADFS attributes and filetype to Linux permission. */ static umode_t adfs_atts2mode(struct super_block *sb, struct inode *inode) { - unsigned int filetype, attr = ADFS_I(inode)->attr; + unsigned int attr = ADFS_I(inode)->attr; umode_t mode, rmask; struct adfs_sb_info *asb = ADFS_SB(sb); @@ -106,9 +93,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode) return S_IFDIR | S_IXUGO | mode; } - filetype = adfs_filetype(inode); - - switch (filetype) { + switch (ADFS_I(inode)->filetype) { case 0xfc0: /* LinkFS */ return S_IFLNK|S_IRWXUGO; @@ -174,50 +159,48 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode) /* * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time - * referenced to 1 Jan 1900 (til 2248) + * referenced to 1 Jan 1900 (til 2248) so we need to discard 2208988800 seconds + * of time to convert from RISC OS epoch to Unix epoch. */ static void adfs_adfs2unix_time(struct timespec *tv, struct inode *inode) { unsigned int high, low; + /* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since + * 01 Jan 1900 00:00:00 (RISC OS epoch) + */ + static const s64 nsec_unix_epoch_diff_risc_os_epoch = + 2208988800000000000LL; + s64 nsec; if (ADFS_I(inode)->stamped == 0) goto cur_time; - high = ADFS_I(inode)->loadaddr << 24; - low = ADFS_I(inode)->execaddr; + high = ADFS_I(inode)->loadaddr & 0xFF; /* top 8 bits of timestamp */ + low = ADFS_I(inode)->execaddr; /* bottom 32 bits of timestamp */ - high |= low >> 8; - low &= 255; + /* convert 40-bit centi-seconds to 32-bit seconds + * going via nanoseconds to retain precision + */ + nsec = (((s64) high << 32) | (s64) low) * 10000000; /* cs to ns */ /* Files dated pre 01 Jan 1970 00:00:00. */ - if (high < 0x336e996a) + if (nsec < nsec_unix_epoch_diff_risc_os_epoch) goto too_early; - /* Files dated post 18 Jan 2038 03:14:05. */ - if (high >= 0x656e9969) - goto too_late; + /* convert from RISC OS to Unix epoch */ + nsec -= nsec_unix_epoch_diff_risc_os_epoch; - /* discard 2208988800 (0x336e996a00) seconds of time */ - high -= 0x336e996a; - - /* convert 40-bit centi-seconds to 32-bit seconds */ - tv->tv_sec = (((high % 100) << 8) + low) / 100 + (high / 100 << 8); - tv->tv_nsec = 0; + *tv = ns_to_timespec(nsec); return; cur_time: - *tv = CURRENT_TIME_SEC; + *tv = CURRENT_TIME; return; too_early: tv->tv_sec = tv->tv_nsec = 0; return; - - too_late: - tv->tv_sec = 0x7ffffffd; - tv->tv_nsec = 0; - return; } /* @@ -279,7 +262,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj) ADFS_I(inode)->loadaddr = obj->loadaddr; ADFS_I(inode)->execaddr = obj->execaddr; ADFS_I(inode)->attr = obj->attr; - ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); + ADFS_I(inode)->filetype = obj->filetype; + ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); inode->i_mode = adfs_atts2mode(sb, inode); adfs_adfs2unix_time(&inode->i_mtime, inode); diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 06d7388b477b..c8bf36a1996a 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -138,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) seq_printf(seq, ",othmask=%o", asb->s_other_mask); + if (asb->s_ftsuffix != 0) + seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix); return 0; } -enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; +enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err}; static const match_table_t tokens = { {Opt_uid, "uid=%u"}, {Opt_gid, "gid=%u"}, {Opt_ownmask, "ownmask=%o"}, {Opt_othmask, "othmask=%o"}, + {Opt_ftsuffix, "ftsuffix=%u"}, {Opt_err, NULL} }; @@ -189,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; asb->s_other_mask = option; break; + case Opt_ftsuffix: + if (match_int(args, &option)) + return -EINVAL; + asb->s_ftsuffix = option; + break; default: printk("ADFS-fs: unrecognised mount option \"%s\" " "or missing value\n", p); @@ -366,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) asb->s_gid = 0; asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; + asb->s_ftsuffix = 0; if (parse_options(sb, data)) goto error; @@ -445,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root); root_obj.name_len = 0; - root_obj.loadaddr = 0; - root_obj.execaddr = 0; + /* Set root object date as 01 Jan 1987 00:00:00 */ + root_obj.loadaddr = 0xfff0003f; + root_obj.execaddr = 0xec22c000; root_obj.size = ADFS_NEWDIR_SIZE; root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ | ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ; + root_obj.filetype = -1; /* * If this is a F+ disk with variable length directories, @@ -463,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) asb->s_dir = &adfs_f_dir_ops; asb->s_namelen = ADFS_F_NAME_LEN; } + /* + * ,xyz hex filetype suffix may be added by driver + * to files that have valid RISC OS filetype + */ + if (asb->s_ftsuffix) + asb->s_namelen += 4; sb->s_d_op = &adfs_dentry_operations; root = adfs_iget(sb, &root_obj); @@ -520,7 +520,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) ctx->reqs_active--; if (unlikely(!ctx->reqs_active && ctx->dead)) - wake_up(&ctx->wait); + wake_up_all(&ctx->wait); } static void aio_fput_routine(struct work_struct *data) @@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *ioctx) * by other CPUs at this point. Right now, we rely on the * locking done by the above calls to ensure this consistency. */ - wake_up(&ioctx->wait); + wake_up_all(&ioctx->wait); put_ioctx(ioctx); /* once for the lookup */ } diff --git a/fs/attr.c b/fs/attr.c index 7ca41811afa1..1007ed616314 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -59,7 +59,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Make sure a caller can chmod. */ if (ia_valid & ATTR_MODE) { - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; /* Also check the setgid bit! */ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : @@ -69,7 +69,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Check for setting the inode time. */ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index bbabdcce1179..f34078d702d3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -570,7 +570,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) unsigned long elf_entry; unsigned long interp_load_addr = 0; unsigned long start_code, end_code, start_data, end_data; - unsigned long reloc_func_desc = 0; + unsigned long reloc_func_desc __maybe_unused = 0; int executable_stack = EXSTACK_DEFAULT; unsigned long def_flags = 0; struct { @@ -111,7 +111,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (!slab) goto out_unlock; - printk("bio: create slab <%s> at %d\n", bslab->name, entry); + printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry); bslab->slab = slab; bslab->slab_ref = 1; bslab->slab_size = sz; diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9c949348510b..de34bfad9ec3 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, int ret; struct posix_acl *acl = NULL; - if (!is_owner_or_cap(dentry->d_inode)) + if (!inode_owner_or_capable(dentry->d_inode)) return -EPERM; if (!IS_POSIXACL(dentry->d_inode)) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5fdb2abc4fa7..d1bace3df9b6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -158,7 +158,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) FS_SYNC_FL | FS_DIRSYNC_FL)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; mutex_lock(&inode->i_mutex); @@ -1077,7 +1077,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, if (flags & ~BTRFS_SUBVOL_RDONLY) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; down_write(&root->fs_info->subvol_sem); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index f5ec2d44150d..faccd47c6c46 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void) if (!workspace) return ERR_PTR(-ENOMEM); - workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); + workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize( + MAX_WBITS, MAX_MEM_LEVEL)); workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!workspace->def_strm.workspace || diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 08f65faac112..0dba6915712b 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) if (!fsc->debugfs_congestion_kb) goto out; - dout("a\n"); - snprintf(name, sizeof(name), "../../bdi/%s", dev_name(fsc->backing_dev_info.dev)); fsc->debugfs_bdi = @@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) if (!fsc->debugfs_bdi) goto out; - dout("b\n"); fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 0600, fsc->client->debugfs_dir, @@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) if (!fsc->debugfs_mdsmap) goto out; - dout("ca\n"); fsc->debugfs_mdsc = debugfs_create_file("mdsc", 0600, fsc->client->debugfs_dir, @@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) if (!fsc->debugfs_mdsc) goto out; - dout("da\n"); fsc->debugfs_caps = debugfs_create_file("caps", 0400, fsc->client->debugfs_dir, @@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) if (!fsc->debugfs_caps) goto out; - dout("ea\n"); fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", 0600, fsc->client->debugfs_dir, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index ebafa65a29b6..1a867a3601ae 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -161,7 +161,7 @@ more: filp->f_pos = di->offset; err = filldir(dirent, dentry->d_name.name, dentry->d_name.len, di->offset, - dentry->d_inode->i_ino, + ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), dentry->d_inode->i_mode >> 12); if (last) { @@ -245,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) dout("readdir off 0 -> '.'\n"); if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), - inode->i_ino, inode->i_mode >> 12) < 0) + ceph_translate_ino(inode->i_sb, inode->i_ino), + inode->i_mode >> 12) < 0) return 0; filp->f_pos = 1; off = 1; } if (filp->f_pos == 1) { + ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino; dout("readdir off 1 -> '..'\n"); if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), - filp->f_dentry->d_parent->d_inode->i_ino, + ceph_translate_ino(inode->i_sb, ino), inode->i_mode >> 12) < 0) return 0; filp->f_pos = 2; @@ -377,7 +379,8 @@ more: if (filldir(dirent, rinfo->dir_dname[off - fi->offset], rinfo->dir_dname_len[off - fi->offset], - pos, ino, ftype) < 0) { + pos, + ceph_translate_ino(inode->i_sb, ino), ftype) < 0) { dout("filldir stopping us...\n"); return 0; } @@ -1024,14 +1027,13 @@ out_touch: } /* - * When a dentry is released, clear the dir I_COMPLETE if it was part - * of the current dir gen or if this is in the snapshot namespace. + * Release our ceph_dentry_info. */ -static void ceph_dentry_release(struct dentry *dentry) +static void ceph_d_release(struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); - dout("dentry_release %p\n", dentry); + dout("d_release %p\n", dentry); if (di) { ceph_dentry_lru_del(dentry); if (di->lease_session) @@ -1256,14 +1258,14 @@ const struct inode_operations ceph_dir_iops = { const struct dentry_operations ceph_dentry_ops = { .d_revalidate = ceph_d_revalidate, - .d_release = ceph_dentry_release, + .d_release = ceph_d_release, }; const struct dentry_operations ceph_snapdir_dentry_ops = { .d_revalidate = ceph_snapdir_d_revalidate, - .d_release = ceph_dentry_release, + .d_release = ceph_d_release, }; const struct dentry_operations ceph_snap_dentry_ops = { - .d_release = ceph_dentry_release, + .d_release = ceph_d_release, }; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7d0e4a82d898..159b512d5a27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -564,11 +564,19 @@ more: * start_request so that a tid has been assigned. */ spin_lock(&ci->i_unsafe_lock); - list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); + list_add_tail(&req->r_unsafe_item, + &ci->i_unsafe_writes); spin_unlock(&ci->i_unsafe_lock); ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); } + ret = ceph_osdc_wait_request(&fsc->client->osdc, req); + if (ret < 0 && req->r_safe_callback) { + spin_lock(&ci->i_unsafe_lock); + list_del_init(&req->r_unsafe_item); + spin_unlock(&ci->i_unsafe_lock); + ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); + } } if (file->f_flags & O_DIRECT) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 193bfa5e9cbd..b54c97da1c43 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work); /* * find or create an inode, given the ceph ino number */ +static int ceph_set_ino_cb(struct inode *inode, void *data) +{ + ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; + inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); + return 0; +} + struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) { struct inode *inode; @@ -1030,9 +1037,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, dout("fill_trace doing d_move %p -> %p\n", req->r_old_dentry, dn); - /* d_move screws up d_subdirs order */ - ceph_i_clear(dir, CEPH_I_COMPLETE); - d_move(req->r_old_dentry, dn); dout(" src %p '%.*s' dst %p '%.*s'\n", req->r_old_dentry, @@ -1044,12 +1048,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* take overwritten dentry's readdir offset */ - dout("dn %p gets %p offset %lld (old offset %lld)\n", - req->r_old_dentry, dn, ceph_dentry(dn)->offset, + /* + * d_move() puts the renamed dentry at the end of + * d_subdirs. We need to assign it an appropriate + * directory offset so we can behave when holding + * I_COMPLETE. + */ + ceph_set_dentry_offset(req->r_old_dentry); + dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); - ceph_dentry(req->r_old_dentry)->offset = - ceph_dentry(dn)->offset; dn = req->r_old_dentry; /* use old_dentry */ in = dn->d_inode; @@ -1809,7 +1816,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); if (!err) { generic_fillattr(inode, stat); - stat->ino = inode->i_ino; + stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); if (ceph_snap(inode) != CEPH_NOSNAP) stat->dev = ceph_snap(inode); else diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9c5085465a63..a9e78b4a258c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -131,6 +131,7 @@ enum { Opt_rbytes, Opt_norbytes, Opt_noasyncreaddir, + Opt_ino32, }; static match_table_t fsopt_tokens = { @@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = { {Opt_rbytes, "rbytes"}, {Opt_norbytes, "norbytes"}, {Opt_noasyncreaddir, "noasyncreaddir"}, + {Opt_ino32, "ino32"}, {-1, NULL} }; @@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private) case Opt_noasyncreaddir: fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; break; + case Opt_ino32: + fsopt->flags |= CEPH_MOUNT_OPT_INO32; + break; default: BUG_ON(token); } @@ -288,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, fsopt->sb_flags = flags; fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; - fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; + fsopt->rsize = CEPH_RSIZE_DEFAULT; fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; @@ -370,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) if (fsopt->wsize) seq_printf(m, ",wsize=%d", fsopt->wsize); - if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) + if (fsopt->rsize != CEPH_RSIZE_DEFAULT) seq_printf(m, ",rsize=%d", fsopt->rsize); if (fsopt->congestion_kb != default_congestion_kb()) seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 20b907d76ae2..619fe719968f 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -27,6 +27,7 @@ #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ +#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */ #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) @@ -35,6 +36,7 @@ #define ceph_test_mount_opt(fsc, opt) \ (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) +#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */ #define CEPH_MAX_READDIR_DEFAULT 1024 #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) #define CEPH_SNAPDIRNAME_DEFAULT ".snap" @@ -319,6 +321,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) return container_of(inode, struct ceph_inode_info, vfs_inode); } +static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) +{ + return (struct ceph_fs_client *)inode->i_sb->s_fs_info; +} + +static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) +{ + return (struct ceph_fs_client *)sb->s_fs_info; +} + static inline struct ceph_vino ceph_vino(struct inode *inode) { return ceph_inode(inode)->i_vino; @@ -327,19 +339,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode) /* * ino_t is <64 bits on many architectures, blech. * - * don't include snap in ino hash, at least for now. + * i_ino (kernel inode) st_ino (userspace) + * i386 32 32 + * x86_64+ino32 64 32 + * x86_64 64 64 + */ +static inline u32 ceph_ino_to_ino32(ino_t ino) +{ + ino ^= ino >> (sizeof(ino) * 8 - 32); + if (!ino) + ino = 1; + return ino; +} + +/* + * kernel i_ino value */ static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) { ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ #if BITS_PER_LONG == 32 - ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; - if (!ino) - ino = 1; + ino = ceph_ino_to_ino32(ino); #endif return ino; } +/* + * user-visible ino (stat, filldir) + */ +#if BITS_PER_LONG == 32 +static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino) +{ + return ino; +} +#else +static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino) +{ + if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32)) + ino = ceph_ino_to_ino32(ino); + return ino; +} +#endif + + /* for printf-style formatting */ #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap @@ -428,13 +470,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) return ((loff_t)frag << 32) | (loff_t)off; } -static inline int ceph_set_ino_cb(struct inode *inode, void *data) -{ - ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; - inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data); - return 0; -} - /* * caps helpers */ @@ -503,15 +538,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client, int *total, int *avail, int *used, int *reserved, int *min); -static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) -{ - return (struct ceph_fs_client *)inode->i_sb->s_fs_info; -} - -static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) -{ - return (struct ceph_fs_client *)sb->s_fs_info; -} /* diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index c6405ce3c50e..06d27a41807f 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c @@ -13,7 +13,6 @@ #ifdef CONFIG_SYSCTL static struct ctl_table_header *fs_table_header; -#endif static ctl_table coda_table[] = { { @@ -40,7 +39,6 @@ static ctl_table coda_table[] = { {} }; -#ifdef CONFIG_SYSCTL static ctl_table fs_table[] = { { .procname = "coda", @@ -49,22 +47,18 @@ static ctl_table fs_table[] = { }, {} }; -#endif void coda_sysctl_init(void) { -#ifdef CONFIG_SYSCTL if ( !fs_table_header ) fs_table_header = register_sysctl_table(fs_table); -#endif } void coda_sysctl_clean(void) { -#ifdef CONFIG_SYSCTL if ( fs_table_header ) { unregister_sysctl_table(fs_table_header); fs_table_header = NULL; } -#endif } +#endif diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c6bd815dc794..2f27e578d466 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -502,7 +502,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) mutex_lock(&root->d_inode->i_mutex); dentry = d_alloc_name(root, s); - if (!IS_ERR(dentry)) { + if (dentry) { d_add(dentry, inode); fsnotify_create(root->d_inode, dentry); } else { diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 2195c213ab2f..816f88e6b9ce 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -45,7 +45,11 @@ static void drop_slab(void) int drop_caches_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { - proc_dointvec_minmax(table, write, buffer, length, ppos); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; if (write) { if (sysctl_drop_caches & 1) iterate_supers(drop_pagecache_sb, NULL); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ff12f7ac73ef..ed38801b57a7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -316,6 +316,19 @@ static void ep_nested_calls_init(struct nested_calls *ncalls) } /** + * ep_events_available - Checks if ready events might be available. + * + * @ep: Pointer to the eventpoll context. + * + * Returns: Returns a value different than zero if ready events are available, + * or zero otherwise. + */ +static inline int ep_events_available(struct eventpoll *ep) +{ + return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; +} + +/** * ep_call_nested - Perform a bound (possibly) nested call, by checking * that the recursion limit is not exceeded, and that * the same nested call (by the meaning of same cookie) is @@ -1135,12 +1148,29 @@ static inline struct timespec ep_set_mstimeout(long ms) return timespec_add_safe(now, ts); } +/** + * ep_poll - Retrieves ready events, and delivers them to the caller supplied + * event buffer. + * + * @ep: Pointer to the eventpoll context. + * @events: Pointer to the userspace buffer where the ready events should be + * stored. + * @maxevents: Size (in terms of number of events) of the caller event buffer. + * @timeout: Maximum timeout for the ready events fetch operation, in + * milliseconds. If the @timeout is zero, the function will not block, + * while if the @timeout is less than zero, the function will block + * until at least one event has been retrieved (or an error + * occurred). + * + * Returns: Returns the number of ready events which have been fetched, or an + * error code, in case of error. + */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { - int res, eavail, timed_out = 0; + int res = 0, eavail, timed_out = 0; unsigned long flags; - long slack; + long slack = 0; wait_queue_t wait; ktime_t expires, *to = NULL; @@ -1151,14 +1181,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, to = &expires; *to = timespec_to_ktime(end_time); } else if (timeout == 0) { + /* + * Avoid the unnecessary trip to the wait queue loop, if the + * caller specified a non blocking operation. + */ timed_out = 1; + spin_lock_irqsave(&ep->lock, flags); + goto check_events; } -retry: +fetch_events: spin_lock_irqsave(&ep->lock, flags); - res = 0; - if (list_empty(&ep->rdllist)) { + if (!ep_events_available(ep)) { /* * We don't have any available event to return to the caller. * We need to sleep here, and we will be wake up by @@ -1174,7 +1209,7 @@ retry: * to TASK_INTERRUPTIBLE before doing the checks. */ set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&ep->rdllist) || timed_out) + if (ep_events_available(ep) || timed_out) break; if (signal_pending(current)) { res = -EINTR; @@ -1191,8 +1226,9 @@ retry: set_current_state(TASK_RUNNING); } +check_events: /* Is it worth to try to dig for events ? */ - eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; + eavail = ep_events_available(ep); spin_unlock_irqrestore(&ep->lock, flags); @@ -1203,7 +1239,7 @@ retry: */ if (!res && eavail && !(res = ep_send_events(ep, events, maxevents)) && !timed_out) - goto retry; + goto fetch_events; return res; } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 7b4180554a62..abea5a17c764 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -406,7 +406,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EINVAL; if (!test_opt(dentry->d_sb, POSIX_ACL)) return -EOPNOTSUPP; - if (!is_owner_or_cap(dentry->d_inode)) + if (!inode_owner_or_capable(dentry->d_inode)) return -EPERM; if (value) { diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 1b48c3370872..645be9e7ee47 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -174,3 +174,9 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); } + +#define ext2_set_bit __test_and_set_bit_le +#define ext2_clear_bit __test_and_clear_bit_le +#define ext2_test_bit test_bit_le +#define ext2_find_first_zero_bit find_first_zero_bit_le +#define ext2_find_next_zero_bit find_next_zero_bit_le diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index e7431309bdca..f81e250ac5c4 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (ret) return ret; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { ret = -EACCES; goto setflags_out; } @@ -89,7 +89,7 @@ setflags_out: case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); case EXT2_IOC_SETVERSION: - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; ret = mnt_want_write(filp->f_path.mnt); if (ret) @@ -115,7 +115,7 @@ setflags_out: if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) return -ENOTTY; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(rsv_window_size, (int __user *)arg)) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e4fa49e6c539..9d021c0d472a 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -435,7 +435,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EINVAL; if (!test_opt(inode->i_sb, POSIX_ACL)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index fc080dd561f7..f4090bd2f345 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -38,7 +38,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int oldflags; unsigned int jflag; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) @@ -123,7 +123,7 @@ flags_out: __u32 generation; int err; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; err = mnt_want_write(filp->f_path.mnt); @@ -192,7 +192,7 @@ setversion_out: if (err) return err; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { err = -EACCES; goto setrsvsz_out; } diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index e0270d1f8d82..21eacd7b7d79 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, return -EINVAL; if (!test_opt(inode->i_sb, POSIX_ACL)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3aa0b72b3b94..4daaf2b753f4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -923,14 +923,14 @@ struct ext4_inode_info { #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ EXT4_MOUNT2_##opt) -#define ext4_set_bit ext2_set_bit +#define ext4_set_bit __test_and_set_bit_le #define ext4_set_bit_atomic ext2_set_bit_atomic -#define ext4_clear_bit ext2_clear_bit +#define ext4_clear_bit __test_and_clear_bit_le #define ext4_clear_bit_atomic ext2_clear_bit_atomic -#define ext4_test_bit ext2_test_bit -#define ext4_find_first_zero_bit ext2_find_first_zero_bit -#define ext4_find_next_zero_bit ext2_find_next_zero_bit -#define ext4_find_next_bit ext2_find_next_bit +#define ext4_test_bit test_bit_le +#define ext4_find_first_zero_bit find_first_zero_bit_le +#define ext4_find_next_zero_bit find_next_zero_bit_le +#define ext4_find_next_bit find_next_bit_le /* * Maximal mount counts between two filesystem checks diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eb3bc2fe647e..a84faa110bcd 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) unsigned int oldflags; unsigned int jflag; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) @@ -146,7 +146,7 @@ flags_out: __u32 generation; int err; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; err = mnt_want_write(filp->f_path.mnt); @@ -298,7 +298,7 @@ mext_out: case EXT4_IOC_MIGRATE: { int err; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; err = mnt_want_write(filp->f_path.mnt); @@ -320,7 +320,7 @@ mext_out: case EXT4_IOC_ALLOC_DA_BLKS: { int err; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; err = mnt_want_write(filp->f_path.mnt); diff --git a/fs/fcntl.c b/fs/fcntl.c index 6c82e5bac039..22764c7c8382 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -159,7 +159,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) /* O_NOATIME can only be set by the owner or superuser */ if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; /* required for strict SunOS emulation */ diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 7c39b885f969..b6cca47f7b07 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -305,7 +305,7 @@ static void cuse_gendev_release(struct device *dev) static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) { struct cuse_conn *cc = fc_to_cc(fc); - struct cuse_init_out *arg = &req->misc.cuse_init_out; + struct cuse_init_out *arg = req->out.args[0].value; struct page *page = req->pages[0]; struct cuse_devinfo devinfo = { }; struct device *dev; @@ -384,6 +384,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) dev_set_uevent_suppress(dev, 0); kobject_uevent(&dev->kobj, KOBJ_ADD); out: + kfree(arg); __free_page(page); return; @@ -405,6 +406,7 @@ static int cuse_send_init(struct cuse_conn *cc) struct page *page; struct fuse_conn *fc = &cc->fc; struct cuse_init_in *arg; + void *outarg; BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); @@ -419,6 +421,10 @@ static int cuse_send_init(struct cuse_conn *cc) if (!page) goto err_put_req; + outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL); + if (!outarg) + goto err_free_page; + arg = &req->misc.cuse_init_in; arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; @@ -429,7 +435,7 @@ static int cuse_send_init(struct cuse_conn *cc) req->in.args[0].value = arg; req->out.numargs = 2; req->out.args[0].size = sizeof(struct cuse_init_out); - req->out.args[0].value = &req->misc.cuse_init_out; + req->out.args[0].value = outarg; req->out.args[1].size = CUSE_INIT_INFO_MAX; req->out.argvar = 1; req->out.argpages = 1; @@ -440,6 +446,8 @@ static int cuse_send_init(struct cuse_conn *cc) return 0; +err_free_page: + __free_page(page); err_put_req: fuse_put_request(fc, req); err: diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cf8d28d1fbad..640fc229df10 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -737,14 +737,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (WARN_ON(PageMlocked(oldpage))) goto out_fallback_unlock; - remove_from_page_cache(oldpage); - page_cache_release(oldpage); - - err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL); + err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL); if (err) { - printk(KERN_WARNING "fuse_try_move_page: failed to add page"); - goto out_fallback_unlock; + unlock_page(newpage); + return err; } + page_cache_get(newpage); if (!(buf->flags & PIPE_BUF_FLAG_LRU)) @@ -1910,6 +1908,21 @@ __acquires(fc->lock) kfree(dequeue_forget(fc, 1, NULL)); } +static void end_polls(struct fuse_conn *fc) +{ + struct rb_node *p; + + p = rb_first(&fc->polled_files); + + while (p) { + struct fuse_file *ff; + ff = rb_entry(p, struct fuse_file, polled_node); + wake_up_interruptible_all(&ff->poll_wait); + + p = rb_next(p); + } +} + /* * Abort all requests. * @@ -1937,6 +1950,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->blocked = 0; end_io_requests(fc); end_queued_requests(fc); + end_polls(fc); wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); @@ -1953,6 +1967,7 @@ int fuse_dev_release(struct inode *inode, struct file *file) fc->connected = 0; fc->blocked = 0; end_queued_requests(fc); + end_polls(fc); wake_up_all(&fc->blocked_waitq); spin_unlock(&fc->lock); fuse_conn_put(fc); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8bd0ef9286c3..c6ba49bd95b3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -158,10 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) { struct inode *inode; - if (nd && nd->flags & LOOKUP_RCU) - return -ECHILD; - - inode = entry->d_inode; + inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) return 0; else if (fuse_dentry_time(entry) < get_jiffies_64()) { @@ -177,6 +174,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!inode) return 0; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + fc = get_fuse_conn(inode); req = fuse_get_req(fc); if (IS_ERR(req)) @@ -970,6 +970,14 @@ static int fuse_access(struct inode *inode, int mask) return err; } +static int fuse_perm_getattr(struct inode *inode, int flags) +{ + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + + return fuse_do_getattr(inode, NULL, NULL); +} + /* * Check permission. The two basic access models of FUSE are: * @@ -989,9 +997,6 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags) bool refreshed = false; int err = 0; - if (flags & IPERM_FLAG_RCU) - return -ECHILD; - if (!fuse_allow_task(fc, current)) return -EACCES; @@ -1000,9 +1005,15 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags) */ if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { - err = fuse_update_attributes(inode, NULL, NULL, &refreshed); - if (err) - return err; + struct fuse_inode *fi = get_fuse_inode(inode); + + if (fi->i_time < get_jiffies_64()) { + refreshed = true; + + err = fuse_perm_getattr(inode, flags); + if (err) + return err; + } } if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { @@ -1012,7 +1023,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags) attributes. This is also needed, because the root node will at first have no permissions */ if (err == -EACCES && !refreshed) { - err = fuse_do_getattr(inode, NULL, NULL); + err = fuse_perm_getattr(inode, flags); if (!err) err = generic_permission(inode, mask, flags, NULL); @@ -1023,13 +1034,16 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags) noticed immediately, only after the attribute timeout has expired */ } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { if (refreshed) return -EACCES; - err = fuse_do_getattr(inode, NULL, NULL); + err = fuse_perm_getattr(inode, flags); if (!err && !(inode->i_mode & S_IXUGO)) return -EACCES; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9e0832dbb1e3..6ea00734984e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -222,7 +222,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) rb_erase(&ff->polled_node, &fc->polled_files); spin_unlock(&fc->lock); - wake_up_interruptible_sync(&ff->poll_wait); + wake_up_interruptible_all(&ff->poll_wait); inarg->fh = ff->fh; inarg->flags = flags; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d4286947bc2c..b788becada76 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -272,7 +272,6 @@ struct fuse_req { struct fuse_init_in init_in; struct fuse_init_out init_out; struct cuse_init_in cuse_init_in; - struct cuse_init_out cuse_init_out; struct { struct fuse_read_in in; u64 attr_ver; diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 06c48a891832..8f26d1a58912 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -74,7 +74,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, return -EINVAL; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { acl = posix_acl_from_xattr(value, size); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4074b952b059..b2682e073eee 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) goto out_drop_write; error = -EACCES; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) goto out; error = 0; diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 508ce662ce12..fbaa6690c8e0 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -47,7 +47,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) if (err) goto out; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { err = -EACCES; goto out_drop_write; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9885082b470f..b9eeb1cd03ff 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -332,8 +332,7 @@ static void truncate_huge_page(struct page *page) { cancel_dirty_page(page, /* No IO accounting for huge pages? */0); ClearPageUptodate(page); - remove_from_page_cache(page); - put_page(page); + delete_from_page_cache(page); } static void truncate_hugepages(struct inode *inode, loff_t lstart) diff --git a/fs/inode.c b/fs/inode.c index 16fefd373fc2..0b3da4a77704 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -25,6 +25,7 @@ #include <linux/async.h> #include <linux/posix_acl.h> #include <linux/ima.h> +#include <linux/cred.h> /* * This is needed for the following functions: @@ -1733,3 +1734,22 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_mode = mode; } EXPORT_SYMBOL(inode_init_owner); + +/** + * inode_owner_or_capable - check current task permissions to inode + * @inode: inode being checked + * + * Return true if current either has CAP_FOWNER to the inode, or + * owns the file. + */ +bool inode_owner_or_capable(const struct inode *inode) +{ + struct user_namespace *ns = inode_userns(inode); + + if (current_user_ns() == ns && current_fsuid() == inode->i_uid) + return true; + if (ns_capable(ns, CAP_FOWNER)) + return true; + return false; +} +EXPORT_SYMBOL(inode_owner_or_capable); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 95b79672150a..828a0e1ea438 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -402,7 +402,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name, if (name[0] != '\0') return -EINVAL; - if (!is_owner_or_cap(dentry->d_inode)) + if (!inode_owner_or_capable(dentry->d_inode)) return -EPERM; if (value) { diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c index fd05a0b9431d..5a001020c542 100644 --- a/fs/jffs2/compr_zlib.c +++ b/fs/jffs2/compr_zlib.c @@ -40,12 +40,13 @@ static z_stream inf_strm, def_strm; static int __init alloc_workspaces(void) { - def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); + def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS, + MAX_MEM_LEVEL)); if (!def_strm.workspace) { - printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize()); + printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL)); return -ENOMEM; } - D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize())); + D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL))); inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); if (!inf_strm.workspace) { printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize()); diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index afe222bf300f..6f98a1866776 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -72,7 +72,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) return err; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { err = -EACCES; goto setflags_out; } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 3fa4c32272df..24838f1eeee5 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -678,7 +678,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, struct posix_acl *acl; int rc; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; /* diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c index 44bbfd249abc..961f02b86d97 100644 --- a/fs/logfs/compr.c +++ b/fs/logfs/compr.c @@ -81,7 +81,7 @@ error: int __init logfs_compr_init(void) { - size_t size = max(zlib_deflate_workspacesize(), + size_t size = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL), zlib_inflate_workspacesize()); stream.workspace = vmalloc(size); if (!stream.workspace) diff --git a/fs/logfs/file.c b/fs/logfs/file.c index e86376b87af1..c2ad7028def4 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -196,7 +196,7 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (IS_RDONLY(inode)) return -EROFS; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; err = get_user(flags, (int __user *)arg); diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig index 0fd7ca994264..6624684dd5de 100644 --- a/fs/minix/Kconfig +++ b/fs/minix/Kconfig @@ -15,3 +15,11 @@ config MINIX_FS module will be called minix. Note that the file system of your root partition (the one containing the directory /) cannot be compiled as a module. + +config MINIX_FS_NATIVE_ENDIAN + def_bool MINIX_FS + depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU) + +config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED + def_bool MINIX_FS + depends on M68K && MMU diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 407b1c84911e..341e2122879a 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -88,4 +88,78 @@ static inline struct minix_inode_info *minix_i(struct inode *inode) return list_entry(inode, struct minix_inode_info, vfs_inode); } +#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \ + defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) + +#error Minix file system byte order broken + +#elif defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) + +/* + * big-endian 32 or 64 bit indexed bitmaps on big-endian system or + * little-endian bitmaps on little-endian system + */ + +#define minix_test_and_set_bit(nr, addr) \ + __test_and_set_bit((nr), (unsigned long *)(addr)) +#define minix_set_bit(nr, addr) \ + __set_bit((nr), (unsigned long *)(addr)) +#define minix_test_and_clear_bit(nr, addr) \ + __test_and_clear_bit((nr), (unsigned long *)(addr)) +#define minix_test_bit(nr, addr) \ + test_bit((nr), (unsigned long *)(addr)) +#define minix_find_first_zero_bit(addr, size) \ + find_first_zero_bit((unsigned long *)(addr), (size)) + +#elif defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED) + +/* + * big-endian 16bit indexed bitmaps + */ + +static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size) +{ + const unsigned short *p = vaddr, *addr = vaddr; + unsigned short num; + + if (!size) + return 0; + + size = (size >> 4) + ((size & 15) > 0); + while (*p++ == 0xffff) { + if (--size == 0) + return (p - addr) << 4; + } + + num = *--p; + return ((p - addr) << 4) + ffz(num); +} + +#define minix_test_and_set_bit(nr, addr) \ + __test_and_set_bit((nr) ^ 16, (unsigned long *)(addr)) +#define minix_set_bit(nr, addr) \ + __set_bit((nr) ^ 16, (unsigned long *)(addr)) +#define minix_test_and_clear_bit(nr, addr) \ + __test_and_clear_bit((nr) ^ 16, (unsigned long *)(addr)) + +static inline int minix_test_bit(int nr, const void *vaddr) +{ + const unsigned short *p = vaddr; + return (p[nr >> 4] & (1U << (nr & 15))) != 0; +} + +#else + +/* + * little-endian bitmaps + */ + +#define minix_test_and_set_bit __test_and_set_bit_le +#define minix_set_bit __set_bit_le +#define minix_test_and_clear_bit __test_and_clear_bit_le +#define minix_test_bit test_bit_le +#define minix_find_first_zero_bit find_first_zero_bit_le + +#endif + #endif /* FS_MINIX_H */ diff --git a/fs/namei.c b/fs/namei.c index a4dfac650c3c..d0066e17d45d 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -183,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag mask &= MAY_READ | MAY_WRITE | MAY_EXEC; + if (current_user_ns() != inode_userns(inode)) + goto other_perms; + if (current_fsuid() == inode->i_uid) mode >>= 6; else { @@ -196,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag mode >>= 3; } +other_perms: /* * If the DACs are ok we don't need any capability check. */ @@ -237,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, * Executable DACs are overridable if at least one exec bit is set. */ if (!(mask & MAY_EXEC) || execute_ok(inode)) - if (capable(CAP_DAC_OVERRIDE)) + if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) return 0; /* @@ -245,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags, */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) - if (capable(CAP_DAC_READ_SEARCH)) + if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) return 0; return -EACCES; @@ -654,6 +658,7 @@ static inline int handle_reval_path(struct nameidata *nd) static inline int exec_permission(struct inode *inode, unsigned int flags) { int ret; + struct user_namespace *ns = inode_userns(inode); if (inode->i_op->permission) { ret = inode->i_op->permission(inode, MAY_EXEC, flags); @@ -666,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags) if (ret == -ECHILD) return ret; - if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) + if (ns_capable(ns, CAP_DAC_OVERRIDE) || + ns_capable(ns, CAP_DAC_READ_SEARCH)) goto ok; return ret; @@ -1845,11 +1851,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode) if (!(dir->i_mode & S_ISVTX)) return 0; + if (current_user_ns() != inode_userns(inode)) + goto other_userns; if (inode->i_uid == fsuid) return 0; if (dir->i_uid == fsuid) return 0; - return !capable(CAP_FOWNER); + +other_userns: + return !ns_capable(inode_userns(inode), CAP_FOWNER); } /* @@ -2029,7 +2039,7 @@ static int may_open(struct path *path, int acc_mode, int flag) } /* O_NOATIME can only be set by the owner or superuser */ - if (flag & O_NOATIME && !is_owner_or_cap(inode)) + if (flag & O_NOATIME && !inode_owner_or_capable(inode)) return -EPERM; /* @@ -2443,7 +2453,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if (error) return error; - if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) + if ((S_ISCHR(mode) || S_ISBLK(mode)) && + !ns_capable(inode_userns(dir), CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) diff --git a/fs/namespace.c b/fs/namespace.c index 9263995bf6a1..7dba2ed03429 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2701,7 +2701,7 @@ void __init mnt_init(void) if (!mount_hashtable) panic("Failed to allocate mount hash table\n"); - printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); + printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 9af34a7e6e13..f5fde36b9e28 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -74,7 +74,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic -#define nilfs_find_next_zero_bit ext2_find_next_zero_bit +#define nilfs_find_next_zero_bit find_next_zero_bit_le /* * persistent object allocator cache diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 95c04c2f2b3e..f2469ba6246b 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -113,7 +113,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, unsigned int flags, oldflags; int ret; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(flags, (int __user *)argp)) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 704f6b1742f3..90f2729b7a5b 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -497,7 +497,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name, if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7a4868196152..09de77ce002a 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -82,7 +82,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, } status = -EACCES; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) goto bail_unlock; if (!S_ISDIR(inode->i_mode)) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51cd6898e7f1..1a97ba1ec3fc 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -831,18 +831,18 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb, static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) { - ext2_set_bit(bit, bitmap); + __test_and_set_bit_le(bit, bitmap); } #define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) { - ext2_clear_bit(bit, bitmap); + __test_and_clear_bit_le(bit, bitmap); } #define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) -#define ocfs2_test_bit ext2_test_bit -#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit -#define ocfs2_find_next_bit ext2_find_next_bit +#define ocfs2_test_bit test_bit_le +#define ocfs2_find_next_zero_bit find_next_zero_bit_le +#define ocfs2_find_next_bit find_next_bit_le #endif /* OCFS2_H */ diff --git a/fs/proc/array.c b/fs/proc/array.c index 7c99c1cf7e5c..5e4f776b0917 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, vsize, mm ? get_mm_rss(mm) : 0, rsslim, - mm ? mm->start_code : 0, - mm ? mm->end_code : 0, + mm ? (permitted ? mm->start_code : 1) : 0, + mm ? (permitted ? mm->end_code : 1) : 0, (permitted && mm) ? mm->start_stack : 0, esp, eip, diff --git a/fs/proc/base.c b/fs/proc/base.c index 7d5bb8b9a4ff..5a670c11aeac 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -390,7 +390,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, save_stack_trace_tsk(task, &trace); for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%p>] %pS\n", + seq_printf(m, "[<%pK>] %pS\n", (void *)entries[i], (void *)entries[i]); } unlock_trace(task); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 01e07f2a188f..f1281339b6fa 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -28,7 +28,7 @@ DEFINE_SPINLOCK(proc_subdir_lock); -static int proc_match(int len, const char *name, struct proc_dir_entry *de) +static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) { if (de->namelen != len) return 0; @@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, { const char *cp = name, *next; struct proc_dir_entry *de; - int len; + unsigned int len; de = *ret; if (!de) @@ -602,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, { struct proc_dir_entry *ent = NULL; const char *fn = name; - int len; + unsigned int len; /* make sure name is valid */ if (!name || !strlen(name)) goto out; @@ -786,7 +786,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) struct proc_dir_entry **p; struct proc_dir_entry *de = NULL; const char *fn = name; - int len; + unsigned int len; spin_lock(&proc_subdir_lock); if (__xlate_proc_name(name, &parent, &fn) != 0) { diff --git a/fs/proc/inode.c b/fs/proc/inode.c index d6a7ca1fdac5..d15aa1b1cc8f 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -46,8 +46,6 @@ static void proc_evict_inode(struct inode *inode) } } -struct vfsmount *proc_mnt; - static struct kmem_cache * proc_inode_cachep; static struct inode *proc_alloc_inode(struct super_block *sb) diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 9ad561ded409..c03e8d3a3a5b 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -107,7 +107,6 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) } void pde_put(struct proc_dir_entry *pde); -extern struct vfsmount *proc_mnt; int proc_fill_super(struct super_block *); struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); diff --git a/fs/proc/root.c b/fs/proc/root.c index ef9fa8e24ad6..a9000e9cfee5 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, struct pid_namespace *ns; struct proc_inode *ei; - if (proc_mnt) { - /* Seed the root directory with a pid so it doesn't need - * to be special in base.c. I would do this earlier but - * the only task alive when /proc is mounted the first time - * is the init_task and it doesn't have any pids. - */ - ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); - if (!ei->pid) - ei->pid = find_get_pid(1); - } - if (flags & MS_KERNMOUNT) ns = (struct pid_namespace *)data; else @@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, return ERR_PTR(err); } - ei = PROC_I(sb->s_root->d_inode); - if (!ei->pid) { - rcu_read_lock(); - ei->pid = get_pid(find_pid_ns(1, ns)); - rcu_read_unlock(); - } - sb->s_flags |= MS_ACTIVE; } + ei = PROC_I(sb->s_root->d_inode); + if (!ei->pid) { + rcu_read_lock(); + ei->pid = get_pid(find_pid_ns(1, ns)); + rcu_read_unlock(); + } + return dget(sb->s_root); } @@ -101,19 +90,20 @@ static struct file_system_type proc_fs_type = { void __init proc_root_init(void) { + struct vfsmount *mnt; int err; proc_init_inodecache(); err = register_filesystem(&proc_fs_type); if (err) return; - proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); - if (IS_ERR(proc_mnt)) { + mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); + if (IS_ERR(mnt)) { unregister_filesystem(&proc_fs_type); return; } - init_pid_ns.proc_mnt = proc_mnt; + init_pid_ns.proc_mnt = mnt; proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e73314afc535..7c708a418acc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,5 +1,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> +#include <linux/huge_mm.h> #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/highmem.h> @@ -7,6 +8,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> +#include <linux/rmap.h> #include <linux/swap.h> #include <linux/swapops.h> @@ -249,8 +251,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) const char *name = arch_vma_name(vma); if (!name) { if (mm) { - if (vma->vm_start <= mm->start_brk && - vma->vm_end >= mm->brk) { + if (vma->vm_start <= mm->brk && + vma->vm_end >= mm->start_brk) { name = "[heap]"; } else if (vma->vm_start <= mm->start_stack && vma->vm_end >= mm->start_stack) { @@ -330,58 +332,86 @@ struct mem_size_stats { unsigned long private_dirty; unsigned long referenced; unsigned long anonymous; + unsigned long anonymous_thp; unsigned long swap; u64 pss; }; -static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, - struct mm_walk *walk) + +static void smaps_pte_entry(pte_t ptent, unsigned long addr, + unsigned long ptent_size, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = mss->vma; - pte_t *pte, ptent; - spinlock_t *ptl; struct page *page; int mapcount; - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - for (; addr != end; pte++, addr += PAGE_SIZE) { - ptent = *pte; - - if (is_swap_pte(ptent)) { - mss->swap += PAGE_SIZE; - continue; - } + if (is_swap_pte(ptent)) { + mss->swap += ptent_size; + return; + } - if (!pte_present(ptent)) - continue; + if (!pte_present(ptent)) + return; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + return; + + if (PageAnon(page)) + mss->anonymous += ptent_size; + + mss->resident += ptent_size; + /* Accumulate the size in pages that have been accessed. */ + if (pte_young(ptent) || PageReferenced(page)) + mss->referenced += ptent_size; + mapcount = page_mapcount(page); + if (mapcount >= 2) { + if (pte_dirty(ptent) || PageDirty(page)) + mss->shared_dirty += ptent_size; + else + mss->shared_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT) / mapcount; + } else { + if (pte_dirty(ptent) || PageDirty(page)) + mss->private_dirty += ptent_size; + else + mss->private_clean += ptent_size; + mss->pss += (ptent_size << PSS_SHIFT); + } +} - page = vm_normal_page(vma, addr, ptent); - if (!page) - continue; +static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct mem_size_stats *mss = walk->private; + struct vm_area_struct *vma = mss->vma; + pte_t *pte; + spinlock_t *ptl; - if (PageAnon(page)) - mss->anonymous += PAGE_SIZE; - - mss->resident += PAGE_SIZE; - /* Accumulate the size in pages that have been accessed. */ - if (pte_young(ptent) || PageReferenced(page)) - mss->referenced += PAGE_SIZE; - mapcount = page_mapcount(page); - if (mapcount >= 2) { - if (pte_dirty(ptent) || PageDirty(page)) - mss->shared_dirty += PAGE_SIZE; - else - mss->shared_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; + spin_lock(&walk->mm->page_table_lock); + if (pmd_trans_huge(*pmd)) { + if (pmd_trans_splitting(*pmd)) { + spin_unlock(&walk->mm->page_table_lock); + wait_split_huge_page(vma->anon_vma, pmd); } else { - if (pte_dirty(ptent) || PageDirty(page)) - mss->private_dirty += PAGE_SIZE; - else - mss->private_clean += PAGE_SIZE; - mss->pss += (PAGE_SIZE << PSS_SHIFT); + smaps_pte_entry(*(pte_t *)pmd, addr, + HPAGE_PMD_SIZE, walk); + spin_unlock(&walk->mm->page_table_lock); + mss->anonymous_thp += HPAGE_PMD_SIZE; + return 0; } + } else { + spin_unlock(&walk->mm->page_table_lock); } + /* + * The mmap_sem held all the way back in m_start() is what + * keeps khugepaged out of here and from collapsing things + * in here. + */ + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) + smaps_pte_entry(*pte, addr, PAGE_SIZE, walk); pte_unmap_unlock(pte - 1, ptl); cond_resched(); return 0; @@ -417,6 +447,7 @@ static int show_smap(struct seq_file *m, void *v) "Private_Dirty: %8lu kB\n" "Referenced: %8lu kB\n" "Anonymous: %8lu kB\n" + "AnonHugePages: %8lu kB\n" "Swap: %8lu kB\n" "KernelPageSize: %8lu kB\n" "MMUPageSize: %8lu kB\n" @@ -430,6 +461,7 @@ static int show_smap(struct seq_file *m, void *v) mss.private_dirty >> 10, mss.referenced >> 10, mss.anonymous >> 10, + mss.anonymous_thp >> 10, mss.swap >> 10, vma_kernel_pagesize(vma) >> 10, vma_mmu_pagesize(vma) >> 10, @@ -469,6 +501,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; + split_huge_page_pmd(walk->mm, pmd); + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; @@ -625,6 +659,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; int err = 0; + split_huge_page_pmd(walk->mm, pmd); + /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); for (; addr != end; addr += PAGE_SIZE) { diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 79265fdc317a..4e153051bc75 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) break; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { err = -EPERM; goto setflags_out; } @@ -103,7 +103,7 @@ setflags_out: err = put_user(inode->i_generation, (int __user *)arg); break; case REISERFS_IOC_SETVERSION: - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { err = -EPERM; break; } diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 90d2fcb67a31..3dc38f1206fc 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -26,7 +26,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value, size_t jcreate_blocks; if (!reiserfs_posixacl(inode->i_sb)) return -EOPNOTSUPP; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EPERM; if (value) { diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 8aacd64957a2..548acf494afd 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (IS_RDONLY(inode)) return -EROFS; - if (!is_owner_or_cap(inode)) + if (!inode_owner_or_capable(inode)) return -EACCES; if (get_user(flags, (int __user *) arg)) diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 8994dd041660..95518a9f589e 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -27,11 +27,10 @@ #include "udf_i.h" #include "udf_sb.h" -#define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) -#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) -#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) -#define udf_find_next_one_bit(addr, size, offset) \ - ext2_find_next_bit((unsigned long *)(addr), size, offset) +#define udf_clear_bit __test_and_clear_bit_le +#define udf_set_bit __test_and_set_bit_le +#define udf_test_bit test_bit_le +#define udf_find_next_one_bit find_next_bit_le static int read_block_bitmap(struct super_block *sb, struct udf_bitmap *bitmap, unsigned int block, diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 9f8775ce381c..954175928240 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -408,7 +408,7 @@ static inline unsigned _ubh_find_next_zero_bit_( for (;;) { count = min_t(unsigned int, size + offset, uspi->s_bpf); size -= count - offset; - pos = ext2_find_next_zero_bit (ubh->bh[base]->b_data, count, offset); + pos = find_next_zero_bit_le(ubh->bh[base]->b_data, count, offset); if (pos < count || !size) break; base++; diff --git a/fs/utimes.c b/fs/utimes.c index 179b58690657..ba653f3dc1bc 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -95,7 +95,7 @@ static int utimes_common(struct path *path, struct timespec *times) if (IS_IMMUTABLE(inode)) goto mnt_drop_write_and_out; - if (!is_owner_or_cap(inode)) { + if (!inode_owner_or_capable(inode)) { error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; diff --git a/fs/xattr.c b/fs/xattr.c index 01bb8135e14a..a19acdb81cd1 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -59,7 +59,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) return -EPERM; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && - (mask & MAY_WRITE) && !is_owner_or_cap(inode)) + (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) return -EPERM; } |