diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 11:28:42 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-13 11:28:42 +0200 |
commit | 77c688ac87183537ed0fb84ec2cb8fa8ec97c458 (patch) | |
tree | d18e117e05c0d71463823536165ddd9ad75b6bc5 | |
parent | 5e40d331bd72447197f26525f21711c4a265b6a6 (diff) | |
parent | a457606a6f81cfddfc9da1ef2a8bf2c65a8eb35e (diff) | |
download | lwn-77c688ac87183537ed0fb84ec2cb8fa8ec97c458.tar.gz lwn-77c688ac87183537ed0fb84ec2cb8fa8ec97c458.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro:
"The big thing in this pile is Eric's unmount-on-rmdir series; we
finally have everything we need for that. The final piece of prereqs
is delayed mntput() - now filesystem shutdown always happens on
shallow stack.
Other than that, we have several new primitives for iov_iter (Matt
Wilcox, culled from his XIP-related series) pushing the conversion to
->read_iter()/ ->write_iter() a bit more, a bunch of fs/dcache.c
cleanups and fixes (including the external name refcounting, which
gives consistent behaviour of d_move() wrt procfs symlinks for long
and short names alike) and assorted cleanups and fixes all over the
place.
This is just the first pile; there's a lot of stuff from various
people that ought to go in this window. Starting with
unionmount/overlayfs mess... ;-/"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (60 commits)
fs/file_table.c: Update alloc_file() comment
vfs: Deduplicate code shared by xattr system calls operating on paths
reiserfs: remove pointless forward declaration of struct nameidata
don't need that forward declaration of struct nameidata in dcache.h anymore
take dname_external() into fs/dcache.c
let path_init() failures treated the same way as subsequent link_path_walk()
fix misuses of f_count() in ppp and netlink
ncpfs: use list_for_each_entry() for d_subdirs walk
vfs: move getname() from callers to do_mount()
gfs2_atomic_open(): skip lookups on hashed dentry
[infiniband] remove pointless assignments
gadgetfs: saner API for gadgetfs_create_file()
f_fs: saner API for ffs_sb_create_file()
jfs: don't hash direct inode
[s390] remove pointless assignment of ->f_op in vmlogrdr ->open()
ecryptfs: ->f_op is never NULL
android: ->f_op is never NULL
nouveau: __iomem misannotations
missing annotation in fs/file.c
fs: namespace: suppress 'may be used uninitialized' warnings
...
74 files changed, 889 insertions, 960 deletions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 8be1ea3bdd5a..fceff7c00a3c 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -237,7 +237,7 @@ noted. This means that most methods can block safely. All methods are only called from a process context (i.e. not from an interrupt handler or bottom half). - alloc_inode: this method is called by inode_alloc() to allocate memory + alloc_inode: this method is called by alloc_inode() to allocate memory for struct inode and initialize it. If this function is not defined, a simple 'struct inode' is allocated. Normally alloc_inode will be used to allocate a larger structure which diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 1402fcc11c2c..f9c732e18284 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -446,7 +446,8 @@ struct procfs_args { * unhappy with OSF UFS. [CHECKME] */ static int -osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) +osf_ufs_mount(const char __user *dirname, + struct ufs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags) } static int -osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) +osf_cdfs_mount(const char __user *dirname, + struct cdfs_args __user *args, int flags) { int retval; struct cdfs_args tmp; @@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags) } static int -osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags) +osf_procfs_mount(const char __user *dirname, + struct procfs_args __user *args, int flags) { struct procfs_args tmp; @@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path, int, flag, void __user *, data) { int retval; - struct filename *name; - name = getname(path); - retval = PTR_ERR(name); - if (IS_ERR(name)) - goto out; switch (typenr) { case 1: - retval = osf_ufs_mount(name->name, data, flag); + retval = osf_ufs_mount(path, data, flag); break; case 6: - retval = osf_cdfs_mount(name->name, data, flag); + retval = osf_cdfs_mount(path, data, flag); break; case 9: - retval = osf_procfs_mount(name->name, data, flag); + retval = osf_procfs_mount(path, data, flag); break; default: retval = -EINVAL; printk("osf_mount(%ld, %x)\n", typenr, flag); } - putname(name); - out: + return retval; } diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d21ff89207cd..df91466f973d 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -308,11 +308,8 @@ static int load_aout_binary(struct linux_binprm *bprm) (current->mm->start_brk = N_BSSADDR(ex)); retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } install_exec_creds(bprm); @@ -324,17 +321,13 @@ static int load_aout_binary(struct linux_binprm *bprm) error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); + if (error != (text_addr & PAGE_MASK)) return error; - } error = read_code(bprm->file, text_addr, 32, ex.a_text + ex.a_data); - if ((signed long)error < 0) { - send_sig(SIGKILL, current, 0); + if ((signed long)error < 0) return error; - } } else { #ifdef WARN_OLD static unsigned long error_time, error_time2; @@ -368,20 +361,16 @@ static int load_aout_binary(struct linux_binprm *bprm) MAP_EXECUTABLE | MAP_32BIT, fd_offset); - if (error != N_TXTADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_TXTADDR(ex)) return error; - } error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_DATADDR(ex)) return error; - } } beyond_if: set_binfmt(&aout_format); diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 820b4009d5f7..3265ce94d282 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -62,12 +62,6 @@ static DEFINE_SPINLOCK(rsxx_ida_lock); /* --------------------Debugfs Setup ------------------- */ -struct rsxx_cram { - u32 f_pos; - u32 offset; - void *i_private; -}; - static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) { struct rsxx_cardinfo *card = m->private; @@ -184,93 +178,50 @@ static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - st = copy_to_user(ubuf, buf, cnt); + st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); + if (!st) + st = copy_to_user(ubuf, buf, cnt); + kfree(buf); if (st) return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = copy_from_user(buf, ubuf, cnt); + if (!st) + st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt, + buf, 1); + kfree(buf); if (st) return st; - - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } -static int rsxx_cram_open(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - info->i_private = inode->i_private; - info->f_pos = file->f_pos; - file->private_data = info; - - return 0; -} - -static int rsxx_cram_release(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = file->private_data; - - if (!info) - return 0; - - kfree(info); - file->private_data = NULL; - - return 0; -} - static const struct file_operations debugfs_cram_fops = { .owner = THIS_MODULE, - .open = rsxx_cram_open, .read = rsxx_cram_read, .write = rsxx_cram_write, - .release = rsxx_cram_release, }; static const struct file_operations debugfs_stats_fops = { diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 5bb5872ffee6..6653473f2757 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -160,13 +160,11 @@ static int vhci_create_device(struct vhci_data *data, __u8 opcode) } static inline ssize_t vhci_get_user(struct vhci_data *data, - const struct iovec *iov, - unsigned long count) + struct iov_iter *from) { - size_t len = iov_length(iov, count); + size_t len = iov_iter_count(from); struct sk_buff *skb; __u8 pkt_type, opcode; - unsigned long i; int ret; if (len < 2 || len > HCI_MAX_FRAME_SIZE) @@ -176,12 +174,9 @@ static inline ssize_t vhci_get_user(struct vhci_data *data, if (!skb) return -ENOMEM; - for (i = 0; i < count; i++) { - if (copy_from_user(skb_put(skb, iov[i].iov_len), - iov[i].iov_base, iov[i].iov_len)) { - kfree_skb(skb); - return -EFAULT; - } + if (copy_from_iter(skb_put(skb, len), len, from) != len) { + kfree_skb(skb); + return -EFAULT; } pkt_type = *((__u8 *) skb->data); @@ -294,13 +289,12 @@ static ssize_t vhci_read(struct file *file, return ret; } -static ssize_t vhci_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long count, loff_t pos) +static ssize_t vhci_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct vhci_data *data = file->private_data; - return vhci_get_user(data, iov, count); + return vhci_get_user(data, from); } static unsigned int vhci_poll(struct file *file, poll_table *wait) @@ -365,7 +359,7 @@ static int vhci_release(struct inode *inode, struct file *file) static const struct file_operations vhci_fops = { .owner = THIS_MODULE, .read = vhci_read, - .aio_write = vhci_write, + .write_iter = vhci_write, .poll = vhci_poll, .open = vhci_open, .release = vhci_release, diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 917403fe10da..524b707894ef 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -622,53 +622,23 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out, return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); } -static ssize_t read_zero(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter) { - size_t written; - - if (!count) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - written = 0; - while (count) { - unsigned long unwritten; - size_t chunk = count; + size_t written = 0; + while (iov_iter_count(iter)) { + size_t chunk = iov_iter_count(iter), n; if (chunk > PAGE_SIZE) chunk = PAGE_SIZE; /* Just for latency reasons */ - unwritten = __clear_user(buf, chunk); - written += chunk - unwritten; - if (unwritten) - break; + n = iov_iter_zero(chunk, iter); + if (!n && iov_iter_count(iter)) + return written ? written : -EFAULT; + written += n; if (signal_pending(current)) return written ? written : -ERESTARTSYS; - buf += chunk; - count -= chunk; cond_resched(); } - return written ? written : -EFAULT; -} - -static ssize_t aio_read_zero(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - size_t written = 0; - unsigned long i; - ssize_t ret; - - for (i = 0; i < nr_segs; i++) { - ret = read_zero(iocb->ki_filp, iov[i].iov_base, iov[i].iov_len, - &pos); - if (ret < 0) - break; - written += ret; - } - - return written ? written : -EFAULT; + return written; } static int mmap_zero(struct file *file, struct vm_area_struct *vma) @@ -738,7 +708,6 @@ static int open_port(struct inode *inode, struct file *filp) #define zero_lseek null_lseek #define full_lseek null_lseek #define write_zero write_null -#define read_full read_zero #define aio_write_zero aio_write_null #define open_mem open_port #define open_kmem open_mem @@ -783,9 +752,9 @@ static const struct file_operations port_fops = { static const struct file_operations zero_fops = { .llseek = zero_lseek, - .read = read_zero, + .read = new_sync_read, .write = write_zero, - .aio_read = aio_read_zero, + .read_iter = read_iter_zero, .aio_write = aio_write_zero, .mmap = mmap_zero, }; @@ -802,7 +771,8 @@ static struct backing_dev_info zero_bdi = { static const struct file_operations full_fops = { .llseek = full_lseek, - .read = read_full, + .read = new_sync_read, + .read_iter = read_iter_zero, .write = write_full, }; diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index f3014c448e1e..5be225c2ba98 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -799,7 +799,7 @@ static int dma_buf_describe(struct seq_file *s) seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, - (long)(buf_obj->file->f_count.counter), + file_count(buf_obj->file), buf_obj->exp_name); seq_puts(s, "\tAttached Devices:\n"); diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c index 47ca88623753..6544b84f0303 100644 --- a/drivers/gpu/drm/nouveau/nouveau_nvif.c +++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c @@ -40,12 +40,12 @@ #include "nouveau_usif.h" static void -nvkm_client_unmap(void *priv, void *ptr, u32 size) +nvkm_client_unmap(void *priv, void __iomem *ptr, u32 size) { iounmap(ptr); } -static void * +static void __iomem * nvkm_client_map(void *priv, u64 handle, u32 size) { return ioremap(handle, size); diff --git a/drivers/gpu/drm/nouveau/nvif/driver.h b/drivers/gpu/drm/nouveau/nvif/driver.h index b72a8f0c2758..ac4bdb3ea506 100644 --- a/drivers/gpu/drm/nouveau/nvif/driver.h +++ b/drivers/gpu/drm/nouveau/nvif/driver.h @@ -9,8 +9,8 @@ struct nvif_driver { int (*suspend)(void *priv); int (*resume)(void *priv); int (*ioctl)(void *priv, bool super, void *data, u32 size, void **hack); - void *(*map)(void *priv, u64 handle, u32 size); - void (*unmap)(void *priv, void *ptr, u32 size); + void __iomem *(*map)(void *priv, u64 handle, u32 size); + void (*unmap)(void *priv, void __iomem *ptr, u32 size); bool keep; }; diff --git a/drivers/gpu/drm/nouveau/nvif/object.h b/drivers/gpu/drm/nouveau/nvif/object.h index fac3a3bbec44..fe519179b76c 100644 --- a/drivers/gpu/drm/nouveau/nvif/object.h +++ b/drivers/gpu/drm/nouveau/nvif/object.h @@ -14,7 +14,7 @@ struct nvif_object { void *priv; /*XXX: hack */ void (*dtor)(struct nvif_object *); struct { - void *ptr; + void __iomem *ptr; u32 size; } map; }; @@ -42,7 +42,7 @@ void nvif_object_unmap(struct nvif_object *); struct nvif_object *_object = nvif_object(a); \ u32 _data; \ if (likely(_object->map.ptr)) \ - _data = ioread##b##_native((u8 *)_object->map.ptr + (c)); \ + _data = ioread##b##_native((u8 __iomem *)_object->map.ptr + (c)); \ else \ _data = nvif_object_rd(_object, (b) / 8, (c)); \ _data; \ @@ -50,7 +50,7 @@ void nvif_object_unmap(struct nvif_object *); #define nvif_wr(a,b,c,d) ({ \ struct nvif_object *_object = nvif_object(a); \ if (likely(_object->map.ptr)) \ - iowrite##b##_native((d), (u8 *)_object->map.ptr + (c)); \ + iowrite##b##_native((d), (u8 __iomem *)_object->map.ptr + (c)); \ else \ nvif_object_wr(_object, (b) / 8, (c), (d)); \ }) diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index e0c404bdc4a8..4977082e081f 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -82,7 +82,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index cab610ccd50e..81854586c081 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -89,7 +89,6 @@ static int create_file(const char *name, umode_t mode, { int error; - *dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(*dentry)) diff --git a/drivers/misc/carma/carma-fpga-program.c b/drivers/misc/carma/carma-fpga-program.c index 7be89832db19..7e97e53f9ff2 100644 --- a/drivers/misc/carma/carma-fpga-program.c +++ b/drivers/misc/carma/carma-fpga-program.c @@ -749,13 +749,8 @@ static ssize_t fpga_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct fpga_dev *priv = filp->private_data; - - count = min_t(size_t, priv->bytes - *f_pos, count); - if (copy_to_user(buf, priv->vb.vaddr + *f_pos, count)) - return -EFAULT; - - *f_pos += count; - return count; + return simple_read_from_buffer(buf, count, ppos, + priv->vb.vaddr, priv->bytes); } static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) @@ -767,26 +762,7 @@ static loff_t fpga_llseek(struct file *filp, loff_t offset, int origin) if ((filp->f_flags & O_ACCMODE) != O_RDONLY) return -EINVAL; - switch (origin) { - case SEEK_SET: /* seek relative to the beginning of the file */ - newpos = offset; - break; - case SEEK_CUR: /* seek relative to current position in the file */ - newpos = filp->f_pos + offset; - break; - case SEEK_END: /* seek relative to the end of the file */ - newpos = priv->fw_size - offset; - break; - default: - return -EINVAL; - } - - /* check for sanity */ - if (newpos > priv->fw_size) - return -EINVAL; - - filp->f_pos = newpos; - return newpos; + return fixed_size_llseek(file, offset, origin, priv->fw_size); } static const struct file_operations fpga_fops = { diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 80e6f3430f65..68c3a3f4e0ab 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -594,7 +594,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (file == ppp->owner) ppp_shutdown_interface(ppp); } - if (atomic_long_read(&file->f_count) <= 2) { + if (atomic_long_read(&file->f_count) < 2) { ppp_release(NULL, file); err = 0; } else diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index a8848db7b09d..9bb48d70957c 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -338,7 +338,6 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) /* set the file options */ filp->private_data = logptr; - filp->f_op = &vmlogrdr_fops; /* start recording for this service*/ if (logptr->autorecording) { diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 713a97226787..ad4f5790a76f 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -339,7 +339,7 @@ static loff_t ashmem_llseek(struct file *file, loff_t offset, int origin) goto out; } - ret = asma->file->f_op->llseek(asma->file, offset, origin); + ret = vfs_llseek(asma->file, offset, origin); if (ret < 0) goto out; diff --git a/drivers/staging/android/ion/compat_ion.c b/drivers/staging/android/ion/compat_ion.c index ee3a7380e53b..a402fdaf54ca 100644 --- a/drivers/staging/android/ion/compat_ion.c +++ b/drivers/staging/android/ion/compat_ion.c @@ -125,7 +125,7 @@ long compat_ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { long ret; - if (!filp->f_op || !filp->f_op->unlocked_ioctl) + if (!filp->f_op->unlocked_ioctl) return -ENOTTY; switch (cmd) { diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c index 0bf0d24d12d5..28b93d39a94e 100644 --- a/drivers/staging/android/logger.c +++ b/drivers/staging/android/logger.c @@ -411,69 +411,18 @@ static void fix_up_readers(struct logger_log *log, size_t len) } /* - * do_write_log - writes 'len' bytes from 'buf' to 'log' - * - * The caller needs to hold log->mutex. - */ -static void do_write_log(struct logger_log *log, const void *buf, size_t count) -{ - size_t len; - - len = min(count, log->size - log->w_off); - memcpy(log->buffer + log->w_off, buf, len); - - if (count != len) - memcpy(log->buffer, buf + len, count - len); - - log->w_off = logger_offset(log, log->w_off + count); - -} - -/* - * do_write_log_user - writes 'len' bytes from the user-space buffer 'buf' to - * the log 'log' - * - * The caller needs to hold log->mutex. - * - * Returns 'count' on success, negative error code on failure. - */ -static ssize_t do_write_log_from_user(struct logger_log *log, - const void __user *buf, size_t count) -{ - size_t len; - - len = min(count, log->size - log->w_off); - if (len && copy_from_user(log->buffer + log->w_off, buf, len)) - return -EFAULT; - - if (count != len) - if (copy_from_user(log->buffer, buf + len, count - len)) - /* - * Note that by not updating w_off, this abandons the - * portion of the new entry that *was* successfully - * copied, just above. This is intentional to avoid - * message corruption from missing fragments. - */ - return -EFAULT; - - log->w_off = logger_offset(log, log->w_off + count); - - return count; -} - -/* - * logger_aio_write - our write method, implementing support for write(), + * logger_write_iter - our write method, implementing support for write(), * writev(), and aio_write(). Writes are our fast path, and we try to optimize * them above all else. */ -static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t ppos) +static ssize_t logger_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct logger_log *log = file_get_log(iocb->ki_filp); - size_t orig; struct logger_entry header; struct timespec now; - ssize_t ret = 0; + size_t len, count; + + count = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); now = current_kernel_time(); @@ -482,7 +431,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, header.sec = now.tv_sec; header.nsec = now.tv_nsec; header.euid = current_euid(); - header.len = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD); + header.len = count; header.hdr_size = sizeof(struct logger_entry); /* null writes succeed, return zero */ @@ -491,8 +440,6 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&log->mutex); - orig = log->w_off; - /* * Fix up any readers, pulling them forward to the first readable * entry after (what will be) the new write offset. We do this now @@ -501,33 +448,35 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov, */ fix_up_readers(log, sizeof(struct logger_entry) + header.len); - do_write_log(log, &header, sizeof(struct logger_entry)); - - while (nr_segs-- > 0) { - size_t len; - ssize_t nr; + len = min(sizeof(header), log->size - log->w_off); + memcpy(log->buffer + log->w_off, &header, len); + memcpy(log->buffer, (char *)&header + len, sizeof(header) - len); - /* figure out how much of this vector we can keep */ - len = min_t(size_t, iov->iov_len, header.len - ret); + len = min(count, log->size - log->w_off); - /* write out this segment's payload */ - nr = do_write_log_from_user(log, iov->iov_base, len); - if (unlikely(nr < 0)) { - log->w_off = orig; - mutex_unlock(&log->mutex); - return nr; - } + if (copy_from_iter(log->buffer + log->w_off, len, from) != len) { + /* + * Note that by not updating w_off, this abandons the + * portion of the new entry that *was* successfully + * copied, just above. This is intentional to avoid + * message corruption from missing fragments. + */ + mutex_unlock(&log->mutex); + return -EFAULT; + } - iov++; - ret += nr; + if (copy_from_iter(log->buffer, count - len, from) != count - len) { + mutex_unlock(&log->mutex); + return -EFAULT; } + log->w_off = logger_offset(log, log->w_off + count); mutex_unlock(&log->mutex); /* wake up any blocked readers */ wake_up_interruptible(&log->wq); - return ret; + return len; } static struct logger_log *get_log_from_minor(int minor) @@ -736,7 +685,7 @@ static long logger_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static const struct file_operations logger_fops = { .owner = THIS_MODULE, .read = logger_read, - .aio_write = logger_aio_write, + .write_iter = logger_write_iter, .poll = logger_poll, .unlocked_ioctl = logger_ioctl, .compat_ioctl = logger_ioctl, diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c index 73cec14cbf56..8b1f53331433 100644 --- a/drivers/staging/vme/devices/vme_user.c +++ b/drivers/staging/vme/devices/vme_user.c @@ -410,41 +410,19 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf, static loff_t vme_user_llseek(struct file *file, loff_t off, int whence) { - loff_t absolute = -1; unsigned int minor = MINOR(file_inode(file)->i_rdev); size_t image_size; + loff_t res; if (minor == CONTROL_MINOR) return -EINVAL; mutex_lock(&image[minor].mutex); image_size = vme_get_size(image[minor].resource); - - switch (whence) { - case SEEK_SET: - absolute = off; - break; - case SEEK_CUR: - absolute = file->f_pos + off; - break; - case SEEK_END: - absolute = image_size + off; - break; - default: - mutex_unlock(&image[minor].mutex); - return -EINVAL; - } - - if ((absolute < 0) || (absolute >= image_size)) { - mutex_unlock(&image[minor].mutex); - return -EINVAL; - } - - file->f_pos = absolute; - + res = fixed_size_llseek(file, off, whence, image_size); mutex_unlock(&image[minor].mutex); - return absolute; + return res; } /* diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 4ad11e03cf54..7c6771d027a2 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -164,10 +164,9 @@ struct ffs_desc_helper { static int __must_check ffs_epfiles_create(struct ffs_data *ffs); static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count); -static struct inode *__must_check +static struct dentry * ffs_sb_create_file(struct super_block *sb, const char *name, void *data, - const struct file_operations *fops, - struct dentry **dentry_p); + const struct file_operations *fops); /* Devices management *******************************************************/ @@ -1119,10 +1118,9 @@ ffs_sb_make_inode(struct super_block *sb, void *data, } /* Create "regular" file */ -static struct inode *ffs_sb_create_file(struct super_block *sb, +static struct dentry *ffs_sb_create_file(struct super_block *sb, const char *name, void *data, - const struct file_operations *fops, - struct dentry **dentry_p) + const struct file_operations *fops) { struct ffs_data *ffs = sb->s_fs_info; struct dentry *dentry; @@ -1141,10 +1139,7 @@ static struct inode *ffs_sb_create_file(struct super_block *sb, } d_add(dentry, inode); - if (dentry_p) - *dentry_p = dentry; - - return inode; + return dentry; } /* Super block */ @@ -1189,7 +1184,7 @@ static int ffs_sb_fill(struct super_block *sb, void *_data, int silent) /* EP0 file */ if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs, - &ffs_ep0_operations, NULL))) + &ffs_ep0_operations))) return -ENOMEM; return 0; @@ -1561,9 +1556,10 @@ static int ffs_epfiles_create(struct ffs_data *ffs) sprintf(epfiles->name, "ep%02x", ffs->eps_addrmap[i]); else sprintf(epfiles->name, "ep%u", i); - if (!unlikely(ffs_sb_create_file(ffs->sb, epfiles->name, epfile, - &ffs_epfile_operations, - &epfile->dentry))) { + epfile->dentry = ffs_sb_create_file(ffs->sb, epfiles->name, + epfile, + &ffs_epfile_operations); + if (unlikely(!epfile->dentry)) { ffs_epfiles_destroy(epfiles, i - 1); return -ENOMEM; } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index edefec2cc584..c744e4975d74 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -198,7 +198,6 @@ struct ep_data { struct list_head epfiles; wait_queue_head_t wait; struct dentry *dentry; - struct inode *inode; }; static inline void get_ep (struct ep_data *data) @@ -1618,10 +1617,9 @@ static void destroy_ep_files (struct dev_data *dev) } -static struct inode * +static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, - void *data, const struct file_operations *fops, - struct dentry **dentry_p); + void *data, const struct file_operations *fops); static int activate_ep_files (struct dev_data *dev) { @@ -1649,10 +1647,9 @@ static int activate_ep_files (struct dev_data *dev) if (!data->req) goto enomem1; - data->inode = gadgetfs_create_file (dev->sb, data->name, - data, &ep_config_operations, - &data->dentry); - if (!data->inode) + data->dentry = gadgetfs_create_file (dev->sb, data->name, + data, &ep_config_operations); + if (!data->dentry) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); } @@ -2012,10 +2009,9 @@ gadgetfs_make_inode (struct super_block *sb, /* creates in fs root directory, so non-renamable and non-linkable. * so inode and dentry are paired, until device reconfig. */ -static struct inode * +static struct dentry * gadgetfs_create_file (struct super_block *sb, char const *name, - void *data, const struct file_operations *fops, - struct dentry **dentry_p) + void *data, const struct file_operations *fops) { struct dentry *dentry; struct inode *inode; @@ -2031,8 +2027,7 @@ gadgetfs_create_file (struct super_block *sb, char const *name, return NULL; } d_add (dentry, inode); - *dentry_p = dentry; - return inode; + return dentry; } static const struct super_operations gadget_fs_operations = { @@ -2080,9 +2075,8 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) goto Enomem; dev->sb = sb; - if (!gadgetfs_create_file (sb, CHIP, - dev, &dev_init_operations, - &dev->dentry)) { + dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &dev_init_operations); + if (!dev->dentry) { put_dev(dev); goto Enomem; } diff --git a/fs/9p/fid.c b/fs/9p/fid.c index d51ec9fafcc8..47db55aee7f2 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -65,8 +65,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) { struct p9_fid *fid, *ret; - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p) uid %d any %d\n", - dentry->d_name.name, dentry, from_kuid(&init_user_ns, uid), + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n", + dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; /* we'll recheck under lock if there's anything to look in */ diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index cc1cfae726b3..eb14e055ea83 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -266,8 +266,8 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) * Now that we do caching with cache mode enabled, We need * to support direct IO */ - p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", - iocb->ki_filp->f_path.dentry->d_name.name, + p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%pD) off/no(%lld/%lu) EINVAL\n", + iocb->ki_filp, (long long)pos, iter->nr_segs); return -EINVAL; diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index b03dd23feda8..a345b2d659cc 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -49,8 +49,8 @@ */ static int v9fs_cached_dentry_delete(const struct dentry *dentry) { - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", + dentry, dentry); /* Don't cache negative dentries */ if (!dentry->d_inode) @@ -67,8 +67,8 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry) static void v9fs_dentry_release(struct dentry *dentry) { struct hlist_node *p, *n; - p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", - dentry->d_name.name, dentry); + p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", + dentry, dentry); hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) p9_client_clunk(hlist_entry(p, struct p9_fid, dlist)); dentry->d_fsdata = NULL; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0b3bfa303dda..4f1151088ebe 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -116,7 +116,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int reclen = 0; struct p9_rdir *rdir; - p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_IOHDRSZ; @@ -172,7 +172,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) struct p9_rdir *rdir; struct p9_dirent curdirent; - p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pD\n", file); fid = file->private_data; buflen = fid->clnt->msize - P9_READDIRHDRSZ; diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 520c11c2dcca..5594505e6e73 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -301,8 +301,8 @@ static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl) struct inode *inode = file_inode(filp); int ret = -ENOLCK; - p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", - filp, cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n", + filp, cmd, fl, filp); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) @@ -337,8 +337,8 @@ static int v9fs_file_flock_dotl(struct file *filp, int cmd, struct inode *inode = file_inode(filp); int ret = -ENOLCK; - p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", - filp, cmd, fl, filp->f_path.dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %pD\n", + filp, cmd, fl, filp); /* No mandatory locks */ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 7fa4f7a7653d..296482fc77a9 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -648,7 +648,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, struct p9_fid *dfid, *ofid, *fid; struct inode *inode; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; ofid = NULL; @@ -755,7 +755,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct p9_fid *fid; struct v9fs_session_info *v9ses; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); @@ -791,8 +791,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode; char *name; - p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n", - dir, dentry->d_name.name, dentry, flags); + p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n", + dir, dentry, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -1239,7 +1239,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) struct p9_fid *fid; struct p9_wstat *st; - p9_debug(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %pd\n", dentry); retval = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); fid = v9fs_fid_lookup(dentry); @@ -1262,8 +1262,8 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) retval = min(strlen(st->extension)+1, (size_t)buflen); memcpy(buffer, st->extension, retval); - p9_debug(P9_DEBUG_VFS, "%s -> %s (%.*s)\n", - dentry->d_name.name, st->extension, buflen, buffer); + p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n", + dentry, st->extension, buflen, buffer); done: p9stat_free(st); @@ -1283,7 +1283,7 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) int len = 0; char *link = __getname(); - p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); if (!link) link = ERR_PTR(-ENOMEM); @@ -1314,8 +1314,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); - p9_debug(P9_DEBUG_VFS, " %s %s\n", - dentry->d_name.name, IS_ERR(s) ? "<error>" : s); + p9_debug(P9_DEBUG_VFS, " %pd %s\n", + dentry, IS_ERR(s) ? "<error>" : s); if (!IS_ERR(s)) __putname(s); } @@ -1364,8 +1364,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, static int v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", - dir->i_ino, dentry->d_name.name, symname); + p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n", + dir->i_ino, dentry, symname); return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); } @@ -1386,8 +1386,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, char *name; struct p9_fid *oldfid; - p9_debug(P9_DEBUG_VFS, " %lu,%s,%s\n", - dir->i_ino, dentry->d_name.name, old_dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", + dir->i_ino, dentry, old_dentry); oldfid = v9fs_fid_clone(old_dentry); if (IS_ERR(oldfid)) @@ -1428,8 +1428,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde char *name; u32 perm; - p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", - dir->i_ino, dentry->d_name.name, mode, + p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", + dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); if (!new_valid_dev(rdev)) diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 1fa85aae24df..02b64f4e576a 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -393,7 +393,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dir_dentry; struct posix_acl *dacl = NULL, *pacl = NULL; - p9_debug(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); @@ -767,8 +767,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, struct p9_fid *dfid, *oldfid; struct v9fs_session_info *v9ses; - p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", - dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "dir ino: %lu, old_name: %pd, new_name: %pd\n", + dir->i_ino, old_dentry, dentry); v9ses = v9fs_inode2v9ses(dir); dir_dentry = dentry->d_parent; @@ -917,7 +917,7 @@ v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) char *link = __getname(); char *target; - p9_debug(P9_DEBUG_VFS, "%s\n", dentry->d_name.name); + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); if (!link) { link = ERR_PTR(-ENOMEM); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 529300327f45..a1645b88fe8a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -669,7 +669,6 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) out_valid: dentry->d_fsdata = dir_version; -out_skip: dput(parent); key_put(key); _leave(" = 1 [valid]"); @@ -682,10 +681,6 @@ not_found: spin_unlock(&dentry->d_lock); out_bad: - /* don't unhash if we have submounts */ - if (check_submounts_and_drop(dentry) != 0) - goto out_skip; - _debug("dropping dentry %s/%s", parent->d_name.name, dentry->d_name.name); dput(parent); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index a7be57e39be7..8fa3895cda02 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -255,12 +255,6 @@ static int autofs4_tree_busy(struct vfsmount *mnt, struct autofs_info *ino = autofs4_dentry_ino(p); unsigned int ino_count = atomic_read(&ino->count); - /* - * Clean stale dentries below that have not been - * invalidated after a mount fail during lookup - */ - d_invalidate(p); - /* allow for dget above and top is already dgot */ if (p == top) ino_count += 2; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ca0ba15a7306..929dec08c348 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -256,11 +256,8 @@ static int load_aout_binary(struct linux_binprm * bprm) (current->mm->start_brk = N_BSSADDR(ex)); retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); - if (retval < 0) { - /* Someone check-me: is this error path enough? */ - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } install_exec_creds(bprm); @@ -278,17 +275,13 @@ static int load_aout_binary(struct linux_binprm * bprm) map_size = ex.a_text+ex.a_data; #endif error = vm_brk(text_addr & PAGE_MASK, map_size); - if (error != (text_addr & PAGE_MASK)) { - send_sig(SIGKILL, current, 0); + if (error != (text_addr & PAGE_MASK)) return error; - } error = read_code(bprm->file, text_addr, pos, ex.a_text+ex.a_data); - if ((signed long)error < 0) { - send_sig(SIGKILL, current, 0); + if ((signed long)error < 0) return error; - } } else { if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) @@ -315,28 +308,22 @@ static int load_aout_binary(struct linux_binprm * bprm) MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); - if (error != N_TXTADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_TXTADDR(ex)) return error; - } error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); - if (error != N_DATADDR(ex)) { - send_sig(SIGKILL, current, 0); + if (error != N_DATADDR(ex)) return error; - } } beyond_if: set_binfmt(&aout_format); retval = set_brk(current->mm->start_brk, current->mm->brk); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) return retval; - } current->mm->start_stack = (unsigned long) create_aout_tables((char __user *) bprm->p, bprm); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3892c1a23241..d8fc0605b9d2 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -738,10 +738,8 @@ static int load_elf_binary(struct linux_binprm *bprm) change some of these later */ retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out_free_dentry; - } current->mm->start_stack = bprm->p; @@ -763,10 +761,8 @@ static int load_elf_binary(struct linux_binprm *bprm) and clear the area. */ retval = set_brk(elf_bss + load_bias, elf_brk + load_bias); - if (retval) { - send_sig(SIGKILL, current, 0); + if (retval) goto out_free_dentry; - } nbyte = ELF_PAGEOFFSET(elf_bss); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; @@ -820,7 +816,6 @@ static int load_elf_binary(struct linux_binprm *bprm) error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, 0); if (BAD_ADDR(error)) { - send_sig(SIGKILL, current, 0); retval = IS_ERR((void *)error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; @@ -851,7 +846,6 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ - send_sig(SIGKILL, current, 0); retval = -EINVAL; goto out_free_dentry; } @@ -883,12 +877,9 @@ static int load_elf_binary(struct linux_binprm *bprm) * up getting placed where the bss needs to go. */ retval = set_brk(elf_bss, elf_brk); - if (retval) { - send_sig(SIGKILL, current, 0); + if (retval) goto out_free_dentry; - } if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { - send_sig(SIGSEGV, current, 0); retval = -EFAULT; /* Nobody gets to see this, but.. */ goto out_free_dentry; } @@ -909,7 +900,6 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_entry += loc->interp_elf_ex.e_entry; } if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); retval = IS_ERR((void *)elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; @@ -922,7 +912,6 @@ static int load_elf_binary(struct linux_binprm *bprm) } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { - force_sig(SIGSEGV, current); retval = -EINVAL; goto out_free_dentry; } @@ -934,19 +923,15 @@ static int load_elf_binary(struct linux_binprm *bprm) #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES retval = arch_setup_additional_pages(bprm, !!elf_interpreter); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out; - } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm); retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); - if (retval < 0) { - send_sig(SIGKILL, current, 0); + if (retval < 0) goto out; - } /* N.B. passed_fileno might not be initialized? */ current->mm->end_code = end_code; current->mm->start_code = start_code; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index fe2a643ee005..d3634bfb7fe1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -317,8 +317,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) goto error; /* there's now no turning back... the old userspace image is dead, - * defunct, deceased, etc. after this point we have to exit via - * error_kill */ + * defunct, deceased, etc. + */ set_personality(PER_LINUX_FDPIC); if (elf_read_implies_exec(&exec_params.hdr, executable_stack)) current->personality |= READ_IMPLIES_EXEC; @@ -343,24 +343,22 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) retval = setup_arg_pages(bprm, current->mm->start_stack, executable_stack); - if (retval < 0) { - send_sig(SIGKILL, current, 0); - goto error_kill; - } + if (retval < 0) + goto error; #endif /* load the executable and interpreter into memory */ retval = elf_fdpic_map_file(&exec_params, bprm->file, current->mm, "executable"); if (retval < 0) - goto error_kill; + goto error; if (interpreter_name) { retval = elf_fdpic_map_file(&interp_params, interpreter, current->mm, "interpreter"); if (retval < 0) { printk(KERN_ERR "Unable to load interpreter\n"); - goto error_kill; + goto error; } allow_write_access(interpreter); @@ -397,7 +395,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) if (IS_ERR_VALUE(current->mm->start_brk)) { retval = current->mm->start_brk; current->mm->start_brk = 0; - goto error_kill; + goto error; } current->mm->brk = current->mm->start_brk; @@ -410,7 +408,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) install_exec_creds(bprm); if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) - goto error_kill; + goto error; kdebug("- start_code %lx", current->mm->start_code); kdebug("- end_code %lx", current->mm->end_code); @@ -449,12 +447,6 @@ error: kfree(interp_params.phdrs); kfree(interp_params.loadmap); return retval; - - /* unrecoverable error - kill the process */ -error_kill: - send_sig(SIGSEGV, current, 0); - goto error; - } /*****************************************************************************/ diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e732274f1afd..0fe1aa047f15 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2419,9 +2419,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out_dput; } - err = d_invalidate(dentry); - if (err) - goto out_unlock; + d_invalidate(dentry); down_write(&root->fs_info->subvol_sem); @@ -2506,7 +2504,6 @@ out_release: btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); out_up_write: up_write(&root->fs_info->subvol_sem); -out_unlock: if (err) { spin_lock(&dest->root_item_lock); root_flags = btrfs_root_flags(&dest->root_item); diff --git a/fs/buffer.c b/fs/buffer.c index 44c14a87750e..d1f704806264 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2318,6 +2318,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, err = 0; balance_dirty_pages_ratelimited(mapping); + + if (unlikely(fatal_signal_pending(current))) { + err = -EINTR; + goto out; + } } /* page covers the boundary, find the boundary offset */ diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 25e745b8eb1b..616db0e77b44 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -880,7 +880,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) { struct cachefiles_object *object; struct cachefiles_cache *cache; - mm_segment_t old_fs; struct file *file; struct path path; loff_t pos, eof; @@ -914,36 +913,27 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) if (IS_ERR(file)) { ret = PTR_ERR(file); } else { - ret = -EIO; - if (file->f_op->write) { - pos = (loff_t) page->index << PAGE_SHIFT; - - /* we mustn't write more data than we have, so we have - * to beware of a partial page at EOF */ - eof = object->fscache.store_limit_l; - len = PAGE_SIZE; - if (eof & ~PAGE_MASK) { - ASSERTCMP(pos, <, eof); - if (eof - pos < PAGE_SIZE) { - _debug("cut short %llx to %llx", - pos, eof); - len = eof - pos; - ASSERTCMP(pos + len, ==, eof); - } + pos = (loff_t) page->index << PAGE_SHIFT; + + /* we mustn't write more data than we have, so we have + * to beware of a partial page at EOF */ + eof = object->fscache.store_limit_l; + len = PAGE_SIZE; + if (eof & ~PAGE_MASK) { + ASSERTCMP(pos, <, eof); + if (eof - pos < PAGE_SIZE) { + _debug("cut short %llx to %llx", + pos, eof); + len = eof - pos; + ASSERTCMP(pos + len, ==, eof); } - - data = kmap(page); - file_start_write(file); - old_fs = get_fs(); - set_fs(KERNEL_DS); - ret = file->f_op->write( - file, (const void __user *) data, len, &pos); - set_fs(old_fs); - kunmap(page); - file_end_write(file); - if (ret != len) - ret = -EIO; } + + data = kmap(page); + ret = __kernel_write(file, data, len, &pos); + kunmap(page); + if (ret != len) + ret = -EIO; fput(file); } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c29d6ae68874..b6c59eaa4f64 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1069,7 +1069,6 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) ceph_dentry_lru_touch(dentry); } else { ceph_dir_clear_complete(dir); - d_drop(dentry); } iput(dir); return valid; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6cbd9c688cfe..073640675a39 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -461,8 +461,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, xid = get_xid(); - cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + inode, direntry, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); if (IS_ERR(tlink)) { @@ -540,8 +540,8 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, struct cifs_fid fid; __u32 oplock; - cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p\n", - inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + inode, direntry, direntry); tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); rc = PTR_ERR(tlink); @@ -713,8 +713,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, xid = get_xid(); - cifs_dbg(FYI, "parent inode = 0x%p name is: %s and dentry = 0x%p\n", - parent_dir_inode, direntry->d_name.name, direntry); + cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", + parent_dir_inode, direntry, direntry); /* check whether path exists */ @@ -833,7 +833,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { int rc = 0; - cifs_dbg(FYI, "In cifs d_delete, name = %s\n", direntry->d_name.name); + cifs_dbg(FYI, "In cifs d_delete, name = %pd\n", direntry); return rc; } */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5f29354b072a..8f7b40fd8f3b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1650,8 +1650,8 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, cifs_sb = CIFS_SB(dentry->d_sb); - cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n", - write_size, *offset, dentry->d_name.name); + cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", + write_size, *offset, dentry); tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -2273,8 +2273,8 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_zap_mapping(inode); @@ -2315,8 +2315,8 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) xid = get_xid(); - cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n", - file->f_path.dentry->d_name.name, datasync); + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7899a40465b3..8fd4ee8e07ff 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1419,8 +1419,8 @@ cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode, d_instantiate(dentry, newinode); #ifdef CONFIG_CIFS_DEBUG2 - cifs_dbg(FYI, "instantiated dentry %p %s to inode %p\n", - dentry, dentry->d_name.name, newinode); + cifs_dbg(FYI, "instantiated dentry %p %pd to inode %p\n", + dentry, dentry, newinode); if (newinode->i_nlink != 2) cifs_dbg(FYI, "unexpected number of links %d\n", @@ -2111,8 +2111,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct cifs_unix_set_info_args *args = NULL; struct cifsFileInfo *open_file; - cifs_dbg(FYI, "setattr_unix on file %s attrs->ia_valid=0x%x\n", - direntry->d_name.name, attrs->ia_valid); + cifs_dbg(FYI, "setattr_unix on file %pd attrs->ia_valid=0x%x\n", + direntry, attrs->ia_valid); xid = get_xid(); @@ -2254,8 +2254,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) xid = get_xid(); - cifs_dbg(FYI, "setattr on file %s attrs->iavalid 0x%x\n", - direntry->d_name.name, attrs->ia_valid); + cifs_dbg(FYI, "setattr on file %pd attrs->iavalid 0x%x\n", + direntry, attrs->ia_valid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) attrs->ia_valid |= ATTR_FORCE; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b334a89d6a66..d2141f101382 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -87,8 +87,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, return; if (dentry) { - int err; - inode = dentry->d_inode; if (inode) { /* @@ -105,10 +103,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, goto out; } } - err = d_invalidate(dentry); + d_invalidate(dentry); dput(dentry); - if (err) - return; } /* diff --git a/fs/compat.c b/fs/compat.c index 66d3d3c6b4b2..b13df99f3534 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -794,25 +794,21 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, char *kernel_type; unsigned long data_page; char *kernel_dev; - struct filename *dir; int retval; - retval = copy_mount_string(type, &kernel_type); - if (retval < 0) + kernel_type = copy_mount_string(type); + retval = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out; - dir = getname(dir_name); - retval = PTR_ERR(dir); - if (IS_ERR(dir)) + kernel_dev = copy_mount_string(dev_name); + retval = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out1; - retval = copy_mount_string(dev_name, &kernel_dev); - if (retval < 0) - goto out2; - retval = copy_mount_options(data, &data_page); if (retval < 0) - goto out3; + goto out2; retval = -EINVAL; @@ -821,19 +817,17 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, do_ncp_super_data_conv((void *)data_page); } else if (!strcmp(kernel_type, NFS4_NAME)) { if (do_nfs4_super_data_conv((void *) data_page)) - goto out4; + goto out3; } } - retval = do_mount(kernel_dev, dir->name, kernel_type, + retval = do_mount(kernel_dev, dir_name, kernel_type, flags, (void*)data_page); - out4: - free_page(data_page); out3: - kfree(kernel_dev); + free_page(data_page); out2: - putname(dir); + kfree(kernel_dev); out1: kfree(kernel_type); out: diff --git a/fs/dcache.c b/fs/dcache.c index cb25a1a5e307..d5a23fd0da90 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -235,18 +235,49 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c return dentry_string_cmp(cs, ct, tcount); } +struct external_name { + union { + atomic_t count; + struct rcu_head head; + } u; + unsigned char name[]; +}; + +static inline struct external_name *external_name(struct dentry *dentry) +{ + return container_of(dentry->d_name.name, struct external_name, name[0]); +} + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); WARN_ON(!hlist_unhashed(&dentry->d_alias)); - if (dname_external(dentry)) - kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); } +static void __d_free_external(struct rcu_head *head) +{ + struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); + kfree(external_name(dentry)); + kmem_cache_free(dentry_cache, dentry); +} + +static inline int dname_external(const struct dentry *dentry) +{ + return dentry->d_name.name != dentry->d_iname; +} + static void dentry_free(struct dentry *dentry) { + if (unlikely(dname_external(dentry))) { + struct external_name *p = external_name(dentry); + if (likely(atomic_dec_and_test(&p->u.count))) { + call_rcu(&dentry->d_u.d_rcu, __d_free_external); + return; + } + } /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -456,7 +487,7 @@ static void __dentry_kill(struct dentry *dentry) * inform the fs via d_prune that this dentry is about to be * unhashed and destroyed. */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) + if (dentry->d_flags & DCACHE_OP_PRUNE) dentry->d_op->d_prune(dentry); if (dentry->d_flags & DCACHE_LRU_LIST) { @@ -619,62 +650,6 @@ kill_it: } EXPORT_SYMBOL(dput); -/** - * d_invalidate - invalidate a dentry - * @dentry: dentry to invalidate - * - * Try to invalidate the dentry if it turns out to be - * possible. If there are other dentries that can be - * reached through this one we can't delete it and we - * return -EBUSY. On success we return 0. - * - * no dcache lock. - */ - -int d_invalidate(struct dentry * dentry) -{ - /* - * If it's already been dropped, return OK. - */ - spin_lock(&dentry->d_lock); - if (d_unhashed(dentry)) { - spin_unlock(&dentry->d_lock); - return 0; - } - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. - */ - if (!list_empty(&dentry->d_subdirs)) { - spin_unlock(&dentry->d_lock); - shrink_dcache_parent(dentry); - spin_lock(&dentry->d_lock); - } - - /* - * Somebody else still using it? - * - * If it's a directory, we can't drop it - * for fear of somebody re-populating it - * with children (even though dropping it - * would make it unreachable from the root, - * we might still populate it if it was a - * working directory or similar). - * We also need to leave mountpoints alone, - * directory or not. - */ - if (dentry->d_lockref.count > 1 && dentry->d_inode) { - if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) { - spin_unlock(&dentry->d_lock); - return -EBUSY; - } - } - - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - return 0; -} -EXPORT_SYMBOL(d_invalidate); /* This must be called with d_lock held */ static inline void __dget_dlock(struct dentry *dentry) @@ -735,7 +710,8 @@ EXPORT_SYMBOL(dget_parent); * acquire the reference to alias and return it. Otherwise return NULL. * Notice that if inode is a directory there can be only one alias and * it can be unhashed only if it has no children, or if it is the root - * of a filesystem. + * of a filesystem, or if the directory was renamed and d_revalidate + * was the first vfs operation to notice. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer * any other hashed alias over that one. @@ -799,20 +775,13 @@ restart: hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_lockref.count) { - /* - * inform the fs via d_prune that this dentry - * is about to be unhashed and destroyed. - */ - if ((dentry->d_flags & DCACHE_OP_PRUNE) && - !d_unhashed(dentry)) - dentry->d_op->d_prune(dentry); - - __dget_dlock(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - spin_unlock(&inode->i_lock); - dput(dentry); - goto restart; + struct dentry *parent = lock_parent(dentry); + if (likely(!dentry->d_lockref.count)) { + __dentry_kill(dentry); + goto restart; + } + if (parent) + spin_unlock(&parent->d_lock); } spin_unlock(&dentry->d_lock); } @@ -1193,7 +1162,7 @@ EXPORT_SYMBOL(have_submounts); * reachable (e.g. NFS can unhash a directory dentry and then the complete * subtree can become unreachable). * - * Only one of check_submounts_and_drop() and d_set_mounted() must succeed. For + * Only one of d_invalidate() and d_set_mounted() must succeed. For * this reason take rename_lock and d_lock on dentry and ancestors. */ int d_set_mounted(struct dentry *dentry) @@ -1202,7 +1171,7 @@ int d_set_mounted(struct dentry *dentry) int ret = -ENOENT; write_seqlock(&rename_lock); for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) { - /* Need exclusion wrt. check_submounts_and_drop() */ + /* Need exclusion wrt. d_invalidate() */ spin_lock(&p->d_lock); if (unlikely(d_unhashed(p))) { spin_unlock(&p->d_lock); @@ -1346,70 +1315,84 @@ void shrink_dcache_for_umount(struct super_block *sb) } } -static enum d_walk_ret check_and_collect(void *_data, struct dentry *dentry) +struct detach_data { + struct select_data select; + struct dentry *mountpoint; +}; +static enum d_walk_ret detach_and_collect(void *_data, struct dentry *dentry) { - struct select_data *data = _data; + struct detach_data *data = _data; if (d_mountpoint(dentry)) { - data->found = -EBUSY; + __dget_dlock(dentry); + data->mountpoint = dentry; return D_WALK_QUIT; } - return select_collect(_data, dentry); + return select_collect(&data->select, dentry); } static void check_and_drop(void *_data) { - struct select_data *data = _data; + struct detach_data *data = _data; - if (d_mountpoint(data->start)) - data->found = -EBUSY; - if (!data->found) - __d_drop(data->start); + if (!data->mountpoint && !data->select.found) + __d_drop(data->select.start); } /** - * check_submounts_and_drop - prune dcache, check for submounts and drop + * d_invalidate - detach submounts, prune dcache, and drop + * @dentry: dentry to invalidate (aka detach, prune and drop) * - * All done as a single atomic operation relative to has_unlinked_ancestor(). - * Returns 0 if successfully unhashed @parent. If there were submounts then - * return -EBUSY. + * no dcache lock. * - * @dentry: dentry to prune and drop + * The final d_drop is done as an atomic operation relative to + * rename_lock ensuring there are no races with d_set_mounted. This + * ensures there are no unhashed dentries on the path to a mountpoint. */ -int check_submounts_and_drop(struct dentry *dentry) +void d_invalidate(struct dentry *dentry) { - int ret = 0; + /* + * If it's already been dropped, return OK. + */ + spin_lock(&dentry->d_lock); + if (d_unhashed(dentry)) { + spin_unlock(&dentry->d_lock); + return; + } + spin_unlock(&dentry->d_lock); /* Negative dentries can be dropped without further checks */ if (!dentry->d_inode) { d_drop(dentry); - goto out; + return; } for (;;) { - struct select_data data; + struct detach_data data; - INIT_LIST_HEAD(&data.dispose); - data.start = dentry; - data.found = 0; + data.mountpoint = NULL; + INIT_LIST_HEAD(&data.select.dispose); + data.select.start = dentry; + data.select.found = 0; + + d_walk(dentry, &data, detach_and_collect, check_and_drop); - d_walk(dentry, &data, check_and_collect, check_and_drop); - ret = data.found; + if (data.select.found) + shrink_dentry_list(&data.select.dispose); - if (!list_empty(&data.dispose)) - shrink_dentry_list(&data.dispose); + if (data.mountpoint) { + detach_mounts(data.mountpoint); + dput(data.mountpoint); + } - if (ret <= 0) + if (!data.mountpoint && !data.select.found) break; cond_resched(); } - -out: - return ret; } -EXPORT_SYMBOL(check_submounts_and_drop); +EXPORT_SYMBOL(d_invalidate); /** * __d_alloc - allocate a dcache entry @@ -1438,11 +1421,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) */ dentry->d_iname[DNAME_INLINE_LEN-1] = 0; if (name->len > DNAME_INLINE_LEN-1) { - dname = kmalloc(name->len + 1, GFP_KERNEL); - if (!dname) { + size_t size = offsetof(struct external_name, name[1]); + struct external_name *p = kmalloc(size + name->len, GFP_KERNEL); + if (!p) { kmem_cache_free(dentry_cache, dentry); return NULL; } + atomic_set(&p->u.count, 1); + dname = p->name; } else { dname = dentry->d_iname; } @@ -2112,10 +2098,10 @@ struct dentry *d_lookup(const struct dentry *parent, const struct qstr *name) struct dentry *dentry; unsigned seq; - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup(parent, name); - if (dentry) + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) break; } while (read_seqretry(&rename_lock, seq)); return dentry; @@ -2372,11 +2358,10 @@ void dentry_update_name_case(struct dentry *dentry, struct qstr *name) } EXPORT_SYMBOL(dentry_update_name_case); -static void switch_names(struct dentry *dentry, struct dentry *target, - bool exchange) +static void swap_names(struct dentry *dentry, struct dentry *target) { - if (dname_external(target)) { - if (dname_external(dentry)) { + if (unlikely(dname_external(target))) { + if (unlikely(dname_external(dentry))) { /* * Both external: swap the pointers */ @@ -2392,7 +2377,7 @@ static void switch_names(struct dentry *dentry, struct dentry *target, target->d_name.name = target->d_iname; } } else { - if (dname_external(dentry)) { + if (unlikely(dname_external(dentry))) { /* * dentry:external, target:internal. Give dentry's * storage to target and make dentry internal @@ -2407,12 +2392,6 @@ static void switch_names(struct dentry *dentry, struct dentry *target, */ unsigned int i; BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - if (!exchange) { - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.hash_len = target->d_name.hash_len; - return; - } for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); @@ -2422,6 +2401,24 @@ static void switch_names(struct dentry *dentry, struct dentry *target, swap(dentry->d_name.hash_len, target->d_name.hash_len); } +static void copy_name(struct dentry *dentry, struct dentry *target) +{ + struct external_name *old_name = NULL; + if (unlikely(dname_external(dentry))) + old_name = external_name(dentry); + if (unlikely(dname_external(target))) { + atomic_inc(&external_name(target)->u.count); + dentry->d_name = target->d_name; + } else { + memcpy(dentry->d_iname, target->d_name.name, + target->d_name.len + 1); + dentry->d_name.name = dentry->d_iname; + dentry->d_name.hash_len = target->d_name.hash_len; + } + if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) + kfree_rcu(old_name, u.head); +} + static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) { /* @@ -2518,7 +2515,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target, } /* Switch the names.. */ - switch_names(dentry, target, exchange); + if (exchange) + swap_names(dentry, target); + else + copy_name(dentry, target); /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { @@ -2625,10 +2625,8 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry, false); - ret = alias; - } + __d_move(alias, dentry, false); + ret = alias; out_err: spin_unlock(&inode->i_lock); if (m2) @@ -2810,6 +2808,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen) * the beginning of the name. The sequence number check at the caller will * retry it again when a d_move() does happen. So any garbage in the buffer * due to mismatched pointer and length will be discarded. + * + * Data dependency barrier is needed to make sure that we see that terminating + * NUL. Alpha strikes again, film at 11... */ static int prepend_name(char **buffer, int *buflen, struct qstr *name) { @@ -2817,6 +2818,8 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; + smp_read_barrier_depends(); + *buflen -= dlen + 1; if (*buflen < 0) return -ENAMETOOLONG; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b4b6ab9873ae..f5bce9096555 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -327,7 +327,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOIOCTLCMD; - if (lower_file->f_op && lower_file->f_op->compat_ioctl) + if (lower_file->f_op->compat_ioctl) rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); return rc; } diff --git a/fs/exec.c b/fs/exec.c index a2b42a98c743..7302b75a9820 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1372,18 +1372,23 @@ int search_binary_handler(struct linux_binprm *bprm) read_unlock(&binfmt_lock); bprm->recursion_depth++; retval = fmt->load_binary(bprm); + read_lock(&binfmt_lock); + put_binfmt(fmt); bprm->recursion_depth--; - if (retval >= 0 || retval != -ENOEXEC || - bprm->mm == NULL || bprm->file == NULL) { - put_binfmt(fmt); + if (retval < 0 && !bprm->mm) { + /* we got to flush_old_exec() and failed after it */ + read_unlock(&binfmt_lock); + force_sigsegv(SIGSEGV, current); + return retval; + } + if (retval != -ENOEXEC || !bprm->file) { + read_unlock(&binfmt_lock); return retval; } - read_lock(&binfmt_lock); - put_binfmt(fmt); } read_unlock(&binfmt_lock); - if (need_retry && retval == -ENOEXEC) { + if (need_retry) { if (printable(bprm->buf[0]) && printable(bprm->buf[1]) && printable(bprm->buf[2]) && printable(bprm->buf[3])) return retval; diff --git a/fs/file.c b/fs/file.c index 66923fe3176e..f3b2c206c18d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -750,6 +750,7 @@ bool get_close_on_exec(unsigned int fd) static int do_dup2(struct files_struct *files, struct file *file, unsigned fd, unsigned flags) +__releases(&files->file_lock) { struct file *tofree; struct fdtable *fdt; diff --git a/fs/file_table.c b/fs/file_table.c index 0bab12b20460..3f85411b03ce 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -150,18 +150,10 @@ over: /** * alloc_file - allocate and initialize a 'struct file' - * @mnt: the vfsmount on which the file will reside - * @dentry: the dentry representing the new file + * + * @path: the (dentry, vfsmount) pair for the new file * @mode: the mode with which the new file will be opened * @fop: the 'struct file_operations' for the new file - * - * Use this instead of get_empty_filp() to get a new - * 'struct file'. Do so because of the same initialization - * pitfalls reasons listed for init_file(). This is a - * preferred interface to using init_file(). - * - * If all the callers of init_file() are eliminated, its - * code should be moved into this function. */ struct file *alloc_file(struct path *path, fmode_t mode, const struct file_operations *fop) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index de1d84af9f7c..dbab798f5caf 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -274,9 +274,6 @@ out: invalid: ret = 0; - - if (!(flags & LOOKUP_RCU) && check_submounts_and_drop(entry) != 0) - ret = 1; goto out; } @@ -1289,9 +1286,7 @@ static int fuse_direntplus_link(struct file *file, d_drop(dentry); } else if (get_node_id(inode) != o->nodeid || ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { - err = d_invalidate(dentry); - if (err) - goto out; + d_invalidate(dentry); } else if (is_bad_inode(inode)) { err = -EIO; goto out; diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index d3a5d4e29ba5..589f4ea9381c 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -93,9 +93,6 @@ invalid_gunlock: if (!had_lock) gfs2_glock_dq_uninit(&d_gh); invalid: - if (check_submounts_and_drop(dentry) != 0) - goto valid; - dput(parent); return 0; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fcf42eadb69c..c4ed823d150e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1245,6 +1245,9 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *d; bool excl = !!(flags & O_EXCL); + if (!d_unhashed(dentry)) + goto skip_lookup; + d = __gfs2_lookup(dir, dentry, file, opened); if (IS_ERR(d)) return PTR_ERR(d); @@ -1261,6 +1264,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, } BUG_ON(d != NULL); + +skip_lookup: if (!(flags & O_CREAT)) return -ENOENT; diff --git a/fs/internal.h b/fs/internal.h index b2623200107b..9477f8f6aefc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); -extern int copy_mount_string(const void __user *, char **); +extern char *copy_mount_string(const void __user *); extern struct vfsmount *lookup_mnt(struct path *); extern int finish_automount(struct vfsmount *, struct path *); diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 413ef89c2d1b..046fee8b6e9b 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -134,8 +134,6 @@ struct jffs2_sb_info { struct rw_semaphore wbuf_sem; /* Protects the write buffer */ struct delayed_work wbuf_dwork; /* write-buffer write-out work */ - int wbuf_queued; /* non-zero delayed work is queued */ - spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index a6597d60d76d..09ed55190ee2 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work) struct jffs2_sb_info *c = work_to_sb(work); struct super_block *sb = OFNI_BS_2SFFJ(c); - spin_lock(&c->wbuf_dwork_lock); - c->wbuf_queued = 0; - spin_unlock(&c->wbuf_dwork_lock); - if (!(sb->s_flags & MS_RDONLY)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); @@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c) if (sb->s_flags & MS_RDONLY) return; - spin_lock(&c->wbuf_dwork_lock); - if (!c->wbuf_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay)) jffs2_dbg(1, "%s()\n", __func__); - delay = msecs_to_jiffies(dirty_writeback_interval * 10); - queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); - c->wbuf_queued = 1; - } - spin_unlock(&c->wbuf_dwork_lock); } int jffs2_nand_flash_setup(struct jffs2_sb_info *c) @@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->erasesize; @@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; @@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); - spin_lock_init(&c->wbuf_dwork_lock); INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index adf8cb045b9e..93e897e588a8 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -550,7 +550,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) inode->i_ino = 0; inode->i_size = sb->s_bdev->bd_inode->i_size; inode->i_mapping->a_ops = &jfs_metapage_aops; - insert_inode_hash(inode); + hlist_add_fake(&inode->i_hash); mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); sbi->direct_inode = inode; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a693f5b01ae6..1c771931bb60 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -463,21 +463,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) goto out_bad; mutex_unlock(&kernfs_mutex); -out_valid: return 1; out_bad: mutex_unlock(&kernfs_mutex); out_bad_unlocked: - /* - * @dentry doesn't match the underlying kernfs node, drop the - * dentry and force lookup. If we have submounts we must allow the - * vfs caches to lie about the state of the filesystem to prevent - * leaks and other nasty things, so use check_submounts_and_drop() - * instead of d_drop(). - */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - return 0; } diff --git a/fs/mount.h b/fs/mount.h index 6740a6215529..f82c62840905 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -21,6 +21,7 @@ struct mnt_pcp { struct mountpoint { struct hlist_node m_hash; struct dentry *m_dentry; + struct hlist_head m_list; int m_count; }; @@ -29,7 +30,10 @@ struct mount { struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; - struct rcu_head mnt_rcu; + union { + struct rcu_head mnt_rcu; + struct llist_node mnt_llist; + }; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else @@ -48,6 +52,7 @@ struct mount { struct mount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ + struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ #ifdef CONFIG_FSNOTIFY struct hlist_head mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; @@ -82,6 +87,15 @@ extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); extern bool legitimize_mnt(struct vfsmount *, unsigned); +extern void __detach_mounts(struct dentry *dentry); + +static inline void detach_mounts(struct dentry *dentry) +{ + if (!d_mountpoint(dentry)) + return; + __detach_mounts(dentry); +} + static inline void get_mnt_ns(struct mnt_namespace *ns) { atomic_inc(&ns->count); @@ -112,3 +126,12 @@ struct proc_mounts { #define proc_mounts(p) (container_of((p), struct proc_mounts, m)) extern const struct seq_operations mounts_op; + +extern bool __is_local_mountpoint(struct dentry *dentry); +static inline bool is_local_mountpoint(struct dentry *dentry) +{ + if (!d_mountpoint(dentry)) + return false; + + return __is_local_mountpoint(dentry); +} diff --git a/fs/namei.c b/fs/namei.c index 3ddb044f3702..43927d14db67 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1306,7 +1306,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (error < 0) { dput(dentry); return ERR_PTR(error); - } else if (!d_invalidate(dentry)) { + } else { + d_invalidate(dentry); dput(dentry); dentry = NULL; } @@ -1435,10 +1436,9 @@ unlazy: dput(dentry); return status; } - if (!d_invalidate(dentry)) { - dput(dentry); - goto need_lookup; - } + d_invalidate(dentry); + dput(dentry); + goto need_lookup; } path->mnt = mnt; @@ -1950,7 +1950,7 @@ static int path_lookupat(int dfd, const char *name, err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); if (unlikely(err)) - return err; + goto out; current->total_link_count = 0; err = link_path_walk(name, nd); @@ -1982,6 +1982,7 @@ static int path_lookupat(int dfd, const char *name, } } +out: if (base) fput(base); @@ -2301,7 +2302,7 @@ path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags err = path_init(dfd, name, flags | LOOKUP_PARENT, &nd, &base); if (unlikely(err)) - return err; + goto out; current->total_link_count = 0; err = link_path_walk(name, &nd); @@ -3565,7 +3566,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&dentry->d_inode->i_mutex); error = -EBUSY; - if (d_mountpoint(dentry)) + if (is_local_mountpoint(dentry)) goto out; error = security_inode_rmdir(dir, dentry); @@ -3579,6 +3580,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); + detach_mounts(dentry); out: mutex_unlock(&dentry->d_inode->i_mutex); @@ -3681,7 +3683,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate return -EPERM; mutex_lock(&target->i_mutex); - if (d_mountpoint(dentry)) + if (is_local_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); @@ -3690,8 +3692,10 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (error) goto out; error = dir->i_op->unlink(dir, dentry); - if (!error) + if (!error) { dont_mount(dentry); + detach_mounts(dentry); + } } } out: @@ -4126,7 +4130,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, mutex_lock(&target->i_mutex); error = -EBUSY; - if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) + if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) goto out; if (max_links && new_dir != old_dir) { @@ -4164,6 +4168,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (is_dir) target->i_flags |= S_DEAD; dont_mount(new_dentry); + detach_mounts(new_dentry); } if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { if (!(flags & RENAME_EXCHANGE)) diff --git a/fs/namespace.c b/fs/namespace.c index ef42d9bee212..348562f14e93 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,6 +23,7 @@ #include <linux/proc_ns.h> #include <linux/magic.h> #include <linux/bootmem.h> +#include <linux/task_work.h> #include "pnode.h" #include "internal.h" @@ -224,6 +225,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_share); INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); + INIT_HLIST_NODE(&mnt->mnt_mp_list); #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif @@ -666,11 +668,45 @@ struct vfsmount *lookup_mnt(struct path *path) return m; } -static struct mountpoint *new_mountpoint(struct dentry *dentry) +/* + * __is_local_mountpoint - Test to see if dentry is a mountpoint in the + * current mount namespace. + * + * The common case is dentries are not mountpoints at all and that + * test is handled inline. For the slow case when we are actually + * dealing with a mountpoint of some kind, walk through all of the + * mounts in the current mount namespace and test to see if the dentry + * is a mountpoint. + * + * The mount_hashtable is not usable in the context because we + * need to identify all mounts that may be in the current mount + * namespace not just a mount that happens to have some specified + * parent mount. + */ +bool __is_local_mountpoint(struct dentry *dentry) +{ + struct mnt_namespace *ns = current->nsproxy->mnt_ns; + struct mount *mnt; + bool is_covered = false; + + if (!d_mountpoint(dentry)) + goto out; + + down_read(&namespace_sem); + list_for_each_entry(mnt, &ns->list, mnt_list) { + is_covered = (mnt->mnt_mountpoint == dentry); + if (is_covered) + break; + } + up_read(&namespace_sem); +out: + return is_covered; +} + +static struct mountpoint *lookup_mountpoint(struct dentry *dentry) { struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; - int ret; hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { @@ -681,6 +717,14 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) return mp; } } + return NULL; +} + +static struct mountpoint *new_mountpoint(struct dentry *dentry) +{ + struct hlist_head *chain = mp_hash(dentry); + struct mountpoint *mp; + int ret; mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); if (!mp) @@ -695,6 +739,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; hlist_add_head(&mp->m_hash, chain); + INIT_HLIST_HEAD(&mp->m_list); return mp; } @@ -702,6 +747,7 @@ static void put_mountpoint(struct mountpoint *mp) { if (!--mp->m_count) { struct dentry *dentry = mp->m_dentry; + BUG_ON(!hlist_empty(&mp->m_list)); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); @@ -748,6 +794,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); hlist_del_init_rcu(&mnt->mnt_hash); + hlist_del_init(&mnt->mnt_mp_list); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -764,6 +811,7 @@ void mnt_set_mountpoint(struct mount *mnt, child_mnt->mnt_mountpoint = dget(mp->m_dentry); child_mnt->mnt_parent = mnt; child_mnt->mnt_mp = mp; + hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); } /* @@ -957,6 +1005,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } +static void cleanup_mnt(struct mount *mnt) +{ + /* + * This probably indicates that somebody messed + * up a mnt_want/drop_write() pair. If this + * happens, the filesystem was probably unable + * to make r/w->r/o transitions. + */ + /* + * The locking used to deal with mnt_count decrement provides barriers, + * so mnt_get_writers() below is safe. + */ + WARN_ON(mnt_get_writers(mnt)); + if (unlikely(mnt->mnt_pins.first)) + mnt_pin_kill(mnt); + fsnotify_vfsmount_delete(&mnt->mnt); + dput(mnt->mnt.mnt_root); + deactivate_super(mnt->mnt.mnt_sb); + mnt_free_id(mnt); + call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); +} + +static void __cleanup_mnt(struct rcu_head *head) +{ + cleanup_mnt(container_of(head, struct mount, mnt_rcu)); +} + +static LLIST_HEAD(delayed_mntput_list); +static void delayed_mntput(struct work_struct *unused) +{ + struct llist_node *node = llist_del_all(&delayed_mntput_list); + struct llist_node *next; + + for (; node; node = next) { + next = llist_next(node); + cleanup_mnt(llist_entry(node, struct mount, mnt_llist)); + } +} +static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); + static void mntput_no_expire(struct mount *mnt) { rcu_read_lock(); @@ -982,24 +1070,18 @@ static void mntput_no_expire(struct mount *mnt) list_del(&mnt->mnt_instance); unlock_mount_hash(); - /* - * This probably indicates that somebody messed - * up a mnt_want/drop_write() pair. If this - * happens, the filesystem was probably unable - * to make r/w->r/o transitions. - */ - /* - * The locking used to deal with mnt_count decrement provides barriers, - * so mnt_get_writers() below is safe. - */ - WARN_ON(mnt_get_writers(mnt)); - if (unlikely(mnt->mnt_pins.first)) - mnt_pin_kill(mnt); - fsnotify_vfsmount_delete(&mnt->mnt); - dput(mnt->mnt.mnt_root); - deactivate_super(mnt->mnt.mnt_sb); - mnt_free_id(mnt); - call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); + if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { + struct task_struct *task = current; + if (likely(!(task->flags & PF_KTHREAD))) { + init_task_work(&mnt->mnt_rcu, __cleanup_mnt); + if (!task_work_add(task, &mnt->mnt_rcu, true)) + return; + } + if (llist_add(&mnt->mnt_llist, &delayed_mntput_list)) + schedule_delayed_work(&delayed_mntput_work, 1); + return; + } + cleanup_mnt(mnt); } void mntput(struct vfsmount *mnt) @@ -1272,6 +1354,7 @@ void umount_tree(struct mount *mnt, int how) if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; if (mnt_has_parent(p)) { + hlist_del_init(&p->mnt_mp_list); put_mountpoint(p->mnt_mp); mnt_add_count(p->mnt_parent, -1); /* move the reference to mountpoint into ->mnt_ex_mountpoint */ @@ -1385,6 +1468,37 @@ static int do_umount(struct mount *mnt, int flags) return retval; } +/* + * __detach_mounts - lazily unmount all mounts on the specified dentry + * + * During unlink, rmdir, and d_drop it is possible to loose the path + * to an existing mountpoint, and wind up leaking the mount. + * detach_mounts allows lazily unmounting those mounts instead of + * leaking them. + * + * The caller may hold dentry->d_inode->i_mutex. + */ +void __detach_mounts(struct dentry *dentry) +{ + struct mountpoint *mp; + struct mount *mnt; + + namespace_lock(); + mp = lookup_mountpoint(dentry); + if (!mp) + goto out_unlock; + + lock_mount_hash(); + while (!hlist_empty(&mp->m_list)) { + mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); + umount_tree(mnt, 2); + } + unlock_mount_hash(); + put_mountpoint(mp); +out_unlock: + namespace_unlock(); +} + /* * Is the caller allowed to modify his namespace? */ @@ -1742,7 +1856,9 @@ retry: namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) { - struct mountpoint *mp = new_mountpoint(dentry); + struct mountpoint *mp = lookup_mountpoint(dentry); + if (!mp) + mp = new_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); mutex_unlock(&dentry->d_inode->i_mutex); @@ -2398,21 +2514,9 @@ int copy_mount_options(const void __user * data, unsigned long *where) return 0; } -int copy_mount_string(const void __user *data, char **where) +char *copy_mount_string(const void __user *data) { - char *tmp; - - if (!data) { - *where = NULL; - return 0; - } - - tmp = strndup_user(data, PAGE_SIZE); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - *where = tmp; - return 0; + return data ? strndup_user(data, PAGE_SIZE) : NULL; } /* @@ -2429,7 +2533,7 @@ int copy_mount_string(const void __user *data, char **where) * Therefore, if this magic number is present, it carries no information * and must be discarded. */ -long do_mount(const char *dev_name, const char *dir_name, +long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; @@ -2441,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name, flags &= ~MS_MGC_MSK; /* Basic sanity checks */ - - if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) - return -EINVAL; - if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; /* ... and get the mountpoint */ - retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); + retval = user_path(dir_name, &path); if (retval) return retval; @@ -2674,37 +2774,30 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, { int ret; char *kernel_type; - struct filename *kernel_dir; char *kernel_dev; unsigned long data_page; - ret = copy_mount_string(type, &kernel_type); - if (ret < 0) + kernel_type = copy_mount_string(type); + ret = PTR_ERR(kernel_type); + if (IS_ERR(kernel_type)) goto out_type; - kernel_dir = getname(dir_name); - if (IS_ERR(kernel_dir)) { - ret = PTR_ERR(kernel_dir); - goto out_dir; - } - - ret = copy_mount_string(dev_name, &kernel_dev); - if (ret < 0) + kernel_dev = copy_mount_string(dev_name); + ret = PTR_ERR(kernel_dev); + if (IS_ERR(kernel_dev)) goto out_dev; ret = copy_mount_options(data, &data_page); if (ret < 0) goto out_data; - ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, + ret = do_mount(kernel_dev, dir_name, kernel_type, flags, (void *) data_page); free_page(data_page); out_data: kfree(kernel_dev); out_dev: - putname(kernel_dir); -out_dir: kfree(kernel_type); out_type: return ret; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 08b8ea8c353e..314e7add99b8 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -388,7 +388,6 @@ static struct dentry * ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) { struct dentry *dent = dentry; - struct list_head *next; if (d_validate(dent, parent)) { if (dent->d_name.len <= NCP_MAXPATHLEN && @@ -404,9 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dent = list_entry(next, struct dentry, d_u.d_child); + list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) { if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) dget(dent); @@ -415,7 +412,6 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) spin_unlock(&parent->d_lock); goto out; } - next = next->next; } spin_unlock(&parent->d_lock); return NULL; diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 32c06587351a..52cb19d66ecb 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h @@ -188,20 +188,14 @@ static inline void ncp_renew_dentries(struct dentry *parent) { struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct list_head *next; struct dentry *dentry; spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); - + list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { if (dentry->d_fsdata == NULL) ncp_age_dentry(server, dentry); else ncp_new_dentry(dentry); - - next = next->next; } spin_unlock(&parent->d_lock); } @@ -210,16 +204,12 @@ static inline void ncp_invalidate_dircache_entries(struct dentry *parent) { struct ncp_server *server = NCP_SERVER(parent->d_inode); - struct list_head *next; struct dentry *dentry; spin_lock(&parent->d_lock); - next = parent->d_subdirs.next; - while (next != &parent->d_subdirs) { - dentry = list_entry(next, struct dentry, d_u.d_child); + list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) { dentry->d_fsdata = NULL; ncp_age_dentry(server, dentry); - next = next->next; } spin_unlock(&parent->d_lock); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 36d921f0c602..06e8cfcbb670 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -486,8 +486,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); goto out; } else { - if (d_invalidate(dentry) != 0) - goto out; + d_invalidate(dentry); dput(dentry); } } @@ -1211,10 +1210,6 @@ out_zap_parent: if (IS_ROOT(dentry)) goto out_valid; } - /* If we have submounts, don't unhash ! */ - if (check_submounts_and_drop(dentry) != 0) - goto out_valid; - dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", __func__, dentry); diff --git a/fs/proc/base.c b/fs/proc/base.c index 950100e326a1..772efa45a452 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1565,7 +1565,6 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) put_task_struct(task); return 1; } - d_drop(dentry); return 0; } @@ -1702,9 +1701,6 @@ out: put_task_struct(task); out_notask: - if (status <= 0) - d_drop(dentry); - return status; } @@ -2618,8 +2614,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } @@ -2639,8 +2634,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { - shrink_dcache_parent(dentry); - d_drop(dentry); + d_invalidate(dentry); dput(dentry); } diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 955bb55fab8c..e11d7c590bb0 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -129,8 +129,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) } put_task_struct(task); } - - d_drop(dentry); return 0; } diff --git a/fs/read_write.c b/fs/read_write.c index 009d8542a889..7d9318c3d43c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -513,6 +513,8 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t return ret; } +EXPORT_SYMBOL(__kernel_write); + ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { ssize_t ret; diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index 857ec7e3016f..f620e9678dd5 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h @@ -7,7 +7,6 @@ struct inode; struct dentry; struct iattr; struct super_block; -struct nameidata; int reiserfs_xattr_register_handlers(void) __init; void reiserfs_xattr_unregister_handlers(void); diff --git a/fs/super.c b/fs/super.c index 1b836107acee..eae088f6aaae 100644 --- a/fs/super.c +++ b/fs/super.c @@ -80,6 +80,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink, inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); total_objects = dentries + inodes + fs_objects + 1; + if (!total_objects) + total_objects = 1; /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); diff --git a/fs/xattr.c b/fs/xattr.c index c69e6d43a0d2..64e83efb742d 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -364,13 +364,12 @@ out: return error; } -SYSCALL_DEFINE5(setxattr, const char __user *, pathname, - const char __user *, name, const void __user *, value, - size_t, size, int, flags) +static int path_setxattr(const char __user *pathname, + const char __user *name, const void __user *value, + size_t size, int flags, unsigned int lookup_flags) { struct path path; int error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -388,28 +387,18 @@ retry: return error; } +SYSCALL_DEFINE5(setxattr, const char __user *, pathname, + const char __user *, name, const void __user *, value, + size_t, size, int, flags) +{ + return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, const char __user *, name, const void __user *, value, size_t, size, int, flags) { - struct path path; - int error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = mnt_want_write(path.mnt); - if (!error) { - error = setxattr(path.dentry, name, value, size, flags); - mnt_drop_write(path.mnt); - } - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_setxattr(pathname, name, value, size, flags, 0); } SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, @@ -481,12 +470,12 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, return error; } -SYSCALL_DEFINE4(getxattr, const char __user *, pathname, - const char __user *, name, void __user *, value, size_t, size) +static ssize_t path_getxattr(const char __user *pathname, + const char __user *name, void __user *value, + size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -500,23 +489,16 @@ retry: return error; } +SYSCALL_DEFINE4(getxattr, const char __user *, pathname, + const char __user *, name, void __user *, value, size_t, size) +{ + return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, const char __user *, name, void __user *, value, size_t, size) { - struct path path; - ssize_t error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = getxattr(path.dentry, name, value, size); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_getxattr(pathname, name, value, size, 0); } SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, @@ -571,12 +553,11 @@ listxattr(struct dentry *d, char __user *list, size_t size) return error; } -SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, - size_t, size) +static ssize_t path_listxattr(const char __user *pathname, char __user *list, + size_t size, unsigned int lookup_flags) { struct path path; ssize_t error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -590,23 +571,16 @@ retry: return error; } +SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, + size_t, size) +{ + return path_listxattr(pathname, list, size, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, size_t, size) { - struct path path; - ssize_t error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = listxattr(path.dentry, list, size); - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_listxattr(pathname, list, size, 0); } SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) @@ -640,12 +614,11 @@ removexattr(struct dentry *d, const char __user *name) return vfs_removexattr(d, kname); } -SYSCALL_DEFINE2(removexattr, const char __user *, pathname, - const char __user *, name) +static int path_removexattr(const char __user *pathname, + const char __user *name, unsigned int lookup_flags) { struct path path; int error; - unsigned int lookup_flags = LOOKUP_FOLLOW; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) @@ -663,27 +636,16 @@ retry: return error; } +SYSCALL_DEFINE2(removexattr, const char __user *, pathname, + const char __user *, name) +{ + return path_removexattr(pathname, name, LOOKUP_FOLLOW); +} + SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, const char __user *, name) { - struct path path; - int error; - unsigned int lookup_flags = 0; -retry: - error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); - if (error) - return error; - error = mnt_want_write(path.mnt); - if (!error) { - error = removexattr(path.dentry, name); - mnt_drop_write(path.mnt); - } - path_put(&path); - if (retry_estale(error, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - return error; + return path_removexattr(pathname, name, 0); } SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 75a227cc7ce2..b2a2a08523bf 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,7 +11,6 @@ #include <linux/rcupdate.h> #include <linux/lockref.h> -struct nameidata; struct path; struct vfsmount; @@ -226,11 +225,6 @@ struct dentry_operations { extern seqlock_t rename_lock; -static inline int dname_external(const struct dentry *dentry) -{ - return dentry->d_name.name != dentry->d_iname; -} - /* * These are the low-level FS interfaces to the dcache.. */ @@ -254,7 +248,7 @@ extern struct dentry * d_obtain_root(struct inode *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); extern void shrink_dcache_for_umount(struct super_block *); -extern int d_invalidate(struct dentry *); +extern void d_invalidate(struct dentry *); /* only used at mount-time */ extern struct dentry * d_make_root(struct inode *); @@ -269,7 +263,6 @@ extern void d_prune_aliases(struct inode *); /* test whether we have any submounts in a subdir tree */ extern int have_submounts(struct dentry *); -extern int check_submounts_and_drop(struct dentry *); /* * This adds the entry to the hash queues. diff --git a/include/linux/fs.h b/include/linux/fs.h index 2023306c620e..ab4f1a10da20 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1839,7 +1839,8 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); extern void kern_unmount(struct vfsmount *mnt); extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern long do_mount(const char *, const char *, const char *, unsigned long, void *); +extern long do_mount(const char *, const char __user *, + const char *, unsigned long, void *); extern struct vfsmount *collect_mounts(struct path *); extern void drop_collected_mounts(struct vfsmount *); extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, @@ -1858,7 +1859,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); -static inline struct inode *file_inode(struct file *f) +static inline struct inode *file_inode(const struct file *f) { return f->f_inode; } diff --git a/include/linux/uio.h b/include/linux/uio.h index 290fbf0b6b8a..9b1581414cd4 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i); +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); +size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, size_t count); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1ce770687ea8..7a6e69441f75 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -519,14 +519,13 @@ struct devkmsg_user { char buf[8192]; }; -static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, - unsigned long count, loff_t pos) +static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { char *buf, *line; int i; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ - size_t len = iov_length(iv, count); + size_t len = iocb->ki_nbytes; ssize_t ret = len; if (len > LOG_LINE_MAX) @@ -535,13 +534,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, if (buf == NULL) return -ENOMEM; - line = buf; - for (i = 0; i < count; i++) { - if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) { - ret = -EFAULT; - goto out; - } - line += iv[i].iov_len; + buf[len] = '\0'; + if (copy_from_iter(buf, len, from) != len) { + kfree(buf); + return -EFAULT; } /* @@ -567,10 +563,8 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, line = endp; } } - line[len] = '\0'; printk_emit(facility, level, NULL, 0, "%s", line); -out: kfree(buf); return ret; } @@ -802,7 +796,7 @@ static int devkmsg_release(struct inode *inode, struct file *file) const struct file_operations kmsg_fops = { .open = devkmsg_open, .read = devkmsg_read, - .aio_write = devkmsg_writev, + .write_iter = devkmsg_write, .llseek = devkmsg_llseek, .poll = devkmsg_poll, .release = devkmsg_release, diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 9a09f2034fcc..eafcf60f6b83 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -4,6 +4,96 @@ #include <linux/slab.h> #include <linux/vmalloc.h> +static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -166,6 +256,50 @@ done: return wanted - bytes; } +static size_t zero_iovec(size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + left = __clear_user(buf, copy); + copy -= left; + skip += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __clear_user(buf, copy); + copy -= left; + skip = copy; + bytes -= copy; + } + + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { @@ -414,12 +548,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t kunmap_atomic(to); } -static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) +static void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + +static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i) { size_t skip, copy, wanted; const struct bio_vec *bvec; - void *kaddr, *from; if (unlikely(bytes > i->count)) bytes = i->count; @@ -432,8 +571,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by skip = i->iov_offset; copy = min_t(size_t, bytes, bvec->bv_len - skip); - kaddr = kmap_atomic(page); - from = kaddr + offset; memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); skip += copy; from += copy; @@ -446,7 +583,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by from += copy; bytes -= copy; } - kunmap_atomic(kaddr); if (skip == bvec->bv_len) { bvec++; skip = 0; @@ -458,12 +594,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by return wanted - bytes; } -static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) +static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i) { size_t skip, copy, wanted; const struct bio_vec *bvec; - void *kaddr, *to; if (unlikely(bytes > i->count)) bytes = i->count; @@ -475,10 +609,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bvec = i->bvec; skip = i->iov_offset; - kaddr = kmap_atomic(page); - - to = kaddr + offset; - copy = min(bytes, bvec->bv_len - skip); memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); @@ -495,7 +625,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t to += copy; bytes -= copy; } - kunmap_atomic(kaddr); if (skip == bvec->bv_len) { bvec++; skip = 0; @@ -507,6 +636,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t return wanted; } +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, + size_t bytes, struct iov_iter *i) +{ + void *kaddr = kmap_atomic(page); + size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; +} + +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, + size_t bytes, struct iov_iter *i) +{ + void *kaddr = kmap_atomic(page); + size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; +} + +static size_t zero_bvec(size_t bytes, struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + copy = min_t(size_t, bytes, bvec->bv_len - skip); + + memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy); + skip += copy; + bytes -= copy; + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memzero_page(bvec->bv_page, bvec->bv_offset, copy); + skip = copy; + bytes -= copy; + } + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted - bytes; +} + static size_t copy_from_user_bvec(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { @@ -672,6 +856,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, } EXPORT_SYMBOL(copy_page_from_iter); +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_to_iter_bvec(addr, bytes, i); + else + return copy_to_iter_iovec(addr, bytes, i); +} +EXPORT_SYMBOL(copy_to_iter); + +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_from_iter_bvec(addr, bytes, i); + else + return copy_from_iter_iovec(addr, bytes, i); +} +EXPORT_SYMBOL(copy_from_iter); + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + if (i->type & ITER_BVEC) { + return zero_bvec(bytes, i); + } else { + return zero_iovec(bytes, i); + } +} +EXPORT_SYMBOL(iov_iter_zero); + size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c416725d28c4..7a186e74b1b3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -715,7 +715,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, * after validation, the socket and the ring may only be used by a * single process, otherwise we fall back to copying. */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || + if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || atomic_read(&nlk->mapped) > 1) excl = false; diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 2553e5b55b89..d87767f76903 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -9,78 +9,48 @@ */ s64 perf_atoll(const char *str) { - unsigned int i; - s64 length = -1, unit = 1; + s64 length; + char *p; + char c; if (!isdigit(str[0])) goto out_err; - for (i = 1; i < strlen(str); i++) { - switch (str[i]) { - case 'B': - case 'b': - break; - case 'K': - if (str[i + 1] != 'B') - goto out_err; - else - goto kilo; - case 'k': - if (str[i + 1] != 'b') - goto out_err; -kilo: - unit = K; - break; - case 'M': - if (str[i + 1] != 'B') - goto out_err; - else - goto mega; - case 'm': - if (str[i + 1] != 'b') + length = strtoll(str, &p, 10); + switch (c = *p++) { + case 'b': case 'B': + if (*p) goto out_err; -mega: - unit = K * K; + case '\0': + return length; + default: + goto out_err; + /* two-letter suffices */ + case 'k': case 'K': + length <<= 10; break; - case 'G': - if (str[i + 1] != 'B') - goto out_err; - else - goto giga; - case 'g': - if (str[i + 1] != 'b') - goto out_err; -giga: - unit = K * K * K; + case 'm': case 'M': + length <<= 20; break; - case 'T': - if (str[i + 1] != 'B') - goto out_err; - else - goto tera; - case 't': - if (str[i + 1] != 'b') - goto out_err; -tera: - unit = K * K * K * K; + case 'g': case 'G': + length <<= 30; break; - case '\0': /* only specified figures */ - unit = 1; + case 't': case 'T': + length <<= 40; break; - default: - if (!isdigit(str[i])) - goto out_err; - break; - } } - - length = atoll(str) * unit; - goto out; + /* we want the cases to match */ + if (islower(c)) { + if (strcmp(p, "b") != 0) + goto out_err; + } else { + if (strcmp(p, "B") != 0) + goto out_err; + } + return length; out_err: - length = -1; -out: - return length; + return -1; } /* |