diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-07-05 13:13:03 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-07-05 13:13:03 -0400 |
commit | 5e66dd6d66ffe758b39b6dcadf2330753ee1159b (patch) | |
tree | a72cdcff4448e4af9425cc213ddf56ab23e697fe /fs | |
parent | 026477c1141b67e98e3bd8bdedb7d4b88a3ecd09 (diff) | |
parent | ca78f6baca863afe2e6a244a0fe94b3a70211d46 (diff) | |
download | lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.tar.gz lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.zip |
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Diffstat (limited to 'fs')
-rw-r--r-- | fs/binfmt_elf.c | 15 | ||||
-rw-r--r-- | fs/block_dev.c | 102 | ||||
-rw-r--r-- | fs/dcache.c | 6 | ||||
-rw-r--r-- | fs/direct-io.c | 6 | ||||
-rw-r--r-- | fs/eventpoll.c | 4 | ||||
-rw-r--r-- | fs/ext2/super.c | 2 | ||||
-rw-r--r-- | fs/ext3/super.c | 2 | ||||
-rw-r--r-- | fs/jffs2/acl.c | 4 | ||||
-rw-r--r-- | fs/jffs2/acl.h | 4 | ||||
-rw-r--r-- | fs/jffs2/malloc.c | 2 | ||||
-rw-r--r-- | fs/jffs2/nodelist.h | 2 | ||||
-rw-r--r-- | fs/jffs2/readinode.c | 1 | ||||
-rw-r--r-- | fs/jffs2/scan.c | 4 | ||||
-rw-r--r-- | fs/jffs2/xattr.c | 45 | ||||
-rw-r--r-- | fs/namei.c | 20 | ||||
-rw-r--r-- | fs/ntfs/inode.c | 33 | ||||
-rw-r--r-- | fs/ntfs/super.c | 31 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 2 | ||||
-rw-r--r-- | fs/reiserfs/super.c | 2 | ||||
-rw-r--r-- | fs/super.c | 11 | ||||
-rw-r--r-- | fs/ufs/super.c | 2 |
21 files changed, 220 insertions, 80 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d0434406eaeb..f42e64210ee5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = { .min_coredump = ELF_EXEC_PAGESIZE }; -#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) +#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) static int set_brk(unsigned long start, unsigned long end) { @@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, * <= p_memsize so it's only necessary to check p_memsz. */ k = load_addr + eppnt->p_vaddr; - if (k > TASK_SIZE || + if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz || eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) { @@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) * allowed task size. Note that p_filesz must always be * <= p_memsz so it is only necessary to check p_memsz. */ - if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz || + if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || elf_ppnt->p_memsz > TASK_SIZE || TASK_SIZE - elf_ppnt->p_memsz < k) { /* set_brk can never work. Avoid overflows. */ @@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) interpreter, &interp_load_addr); if (BAD_ADDR(elf_entry)) { - printk(KERN_ERR "Unable to load interpreter %.128s\n", - elf_interpreter); force_sig(SIGSEGV, current); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + retval = IS_ERR((void *)elf_entry) ? + (int)elf_entry : -EINVAL; goto out_free_dentry; } reloc_func_desc = interp_load_addr; @@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } else { elf_entry = loc->elf_ex.e_entry; if (BAD_ADDR(elf_entry)) { - send_sig(SIGSEGV, current, 0); - retval = -ENOEXEC; /* Nobody gets to see this, but.. */ + force_sig(SIGSEGV, current); + retval = -EINVAL; goto out_free_dentry; } } diff --git a/fs/block_dev.c b/fs/block_dev.c index 9633a490dab0..37534573960b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder, if (!bo) return -ENOMEM; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); res = bd_claim(bdev, holder); if (res || !add_bd_holder(bdev, bo)) free_bd_holder(bo); @@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev, if (!kobj) return; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION); bd_release(bdev); if ((bo = del_bd_holder(bdev, kobj))) free_bd_holder(bo); @@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags); + +struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode) +{ + struct block_device *bdev = bdget(dev); + int err = -ENOMEM; + int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; + if (bdev) + err = blkdev_get_partition(bdev, mode, flags); + return err ? ERR_PTR(err) : bdev; +} + +EXPORT_SYMBOL(open_partition_by_devnum); + + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int do_open(struct block_device *bdev, struct file *file) +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags); + +static int +do_open(struct block_device *bdev, struct file *file, unsigned int subclass) { struct module *owner = NULL; struct gendisk *disk; @@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file) } owner = disk->fops->owner; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); + if (!bdev->bd_openers) { bdev->bd_disk = disk; bdev->bd_contains = bdev; @@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file) ret = -ENOMEM; if (!whole) goto out_first; - ret = blkdev_get(whole, file->f_mode, file->f_flags); + ret = blkdev_get_whole(whole, file->f_mode, file->f_flags); if (ret) goto out_first; bdev->bd_contains = whole; - mutex_lock(&whole->bd_mutex); + mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE); whole->bd_part_count++; p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = @@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file) if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + BD_MUTEX_PARTITION); bdev->bd_contains->bd_part_count++; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) fake_file.f_dentry = &fake_dentry; fake_dentry.d_inode = bdev->bd_inode; - return do_open(bdev, &fake_file); + return do_open(bdev, &fake_file, BD_MUTEX_NORMAL); } EXPORT_SYMBOL(blkdev_get); +static int +blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_WHOLE); +} + +static int +blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags) +{ + /* + * This crockload is due to bad choice of ->open() type. + * It will go away. + * For now, block device ->open() routine must _not_ + * examine anything in 'inode' argument except ->i_rdev. + */ + struct file fake_file = {}; + struct dentry fake_dentry = {}; + fake_file.f_mode = mode; + fake_file.f_flags = flags; + fake_file.f_dentry = &fake_dentry; + fake_dentry.d_inode = bdev->bd_inode; + + return do_open(bdev, &fake_file, BD_MUTEX_PARTITION); +} + static int blkdev_open(struct inode * inode, struct file * filp) { struct block_device *bdev; @@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp) bdev = bd_acquire(inode); - res = do_open(bdev, filp); + res = do_open(bdev, filp, BD_MUTEX_NORMAL); if (res) return res; @@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp) return res; } -int blkdev_put(struct block_device *bdev) +static int __blkdev_put(struct block_device *bdev, unsigned int subclass) { int ret = 0; struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; - mutex_lock(&bdev->bd_mutex); + mutex_lock_nested(&bdev->bd_mutex, subclass); lock_kernel(); if (!--bdev->bd_openers) { sync_blockdev(bdev); @@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev) if (disk->fops->release) ret = disk->fops->release(bd_inode, NULL); } else { - mutex_lock(&bdev->bd_contains->bd_mutex); + mutex_lock_nested(&bdev->bd_contains->bd_mutex, + subclass + 1); bdev->bd_contains->bd_part_count--; mutex_unlock(&bdev->bd_contains->bd_mutex); } @@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev) } bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; - if (bdev != bdev->bd_contains) { - blkdev_put(bdev->bd_contains); - } + if (bdev != bdev->bd_contains) + __blkdev_put(bdev->bd_contains, subclass + 1); bdev->bd_contains = NULL; } unlock_kernel(); @@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev) return ret; } +int blkdev_put(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_NORMAL); +} + EXPORT_SYMBOL(blkdev_put); +int blkdev_put_partition(struct block_device *bdev) +{ + return __blkdev_put(bdev, BD_MUTEX_PARTITION); +} + +EXPORT_SYMBOL(blkdev_put_partition); + static int blkdev_close(struct inode * inode, struct file * filp) { struct block_device *bdev = I_BDEV(filp->f_mapping->host); diff --git a/fs/dcache.c b/fs/dcache.c index c6e3535be192..1b4a3a34ec57 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); -static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED; +static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(dcache_lock); @@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target) */ if (target < dentry) { spin_lock(&target->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&dentry->d_lock); - spin_lock(&target->d_lock); + spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED); } /* Move the dentry to the target hash queue, if on different bucket */ diff --git a/fs/direct-io.c b/fs/direct-io.c index 538fb0418fba..5981e17f46f0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) if (dio->end_io && dio->result) dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); if (dio->lock_type == DIO_LOCKING) - up_read(&dio->inode->i_alloc_sem); + /* lockdep: non-owner release */ + up_read_non_owner(&dio->inode->i_alloc_sem); } /* @@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } if (dio_lock_type == DIO_LOCKING) - down_read(&inode->i_alloc_sem); + /* lockdep: not the owner will release it */ + down_read_non_owner(&inode->i_alloc_sem); } /* diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bbd0b08..19ffb043abbc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -120,7 +120,7 @@ struct epoll_filefd { */ struct wake_task_node { struct list_head llink; - task_t *task; + struct task_struct *task; wait_queue_head_t *wq; }; @@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) { int wake_nests = 0; unsigned long flags; - task_t *this_task = current; + struct task_struct *this_task = current; struct list_head *lsthead = &psw->wake_task_list, *lnk; struct wake_task_node *tncur; struct wake_task_node tnode; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9f43879d6d68..f2702cda9779 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f2dd71336612..813d589cc6c0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 9c2077e7e081..0ae3cd10702c 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir) return rc; } -void jffs2_clear_acl(struct inode *inode) +void jffs2_clear_acl(struct jffs2_inode_info *f) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { posix_acl_release(f->i_acl_access); f->i_acl_access = JFFS2_ACL_NOT_CACHED; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8893bd1a6ba7..fa327dbd3171 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -30,7 +30,7 @@ struct jffs2_acl_header { extern int jffs2_permission(struct inode *, int, struct nameidata *); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl(struct inode *, struct inode *); -extern void jffs2_clear_acl(struct inode *); +extern void jffs2_clear_acl(struct jffs2_inode_info *); extern struct xattr_handler jffs2_acl_access_xattr_handler; extern struct xattr_handler jffs2_acl_default_xattr_handler; @@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; #define jffs2_permission NULL #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl(inode,dir) (0) -#define jffs2_clear_acl(inode) +#define jffs2_clear_acl(f) #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c index 8310c95478e9..33f291005012 100644 --- a/fs/jffs2/malloc.c +++ b/fs/jffs2/malloc.c @@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x) kmem_cache_free(tmp_dnode_info_slab, x); } -struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) +static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void) { struct jffs2_raw_node_ref *ret; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index f752baa8d399..cae92c14116d 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -426,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f); /* scan.c */ int jffs2_scan_medium(struct jffs2_sb_info *c); void jffs2_rotate_lists(struct jffs2_sb_info *c); -int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, - uint32_t ofs, uint32_t len); struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino); int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb); int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index cc1899268c43..266423b2709d 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) struct jffs2_full_dirent *fd, *fds; int deleted; + jffs2_clear_acl(f); jffs2_xattr_delete_inode(c, f->inocache); down(&f->sem); deleted = f->inocache && !f->inocache->nlink; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 2bfdc33752d3..e2413466ddd5 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) return ret; } -int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf, - uint32_t ofs, uint32_t len) +static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf, + uint32_t ofs, uint32_t len) { int ret; size_t retlen; diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 18e66dbf23b4..25bc1ae08648 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -50,9 +50,10 @@ * is used to write xdatum to medium. xd->version will be incremented. * create_xattr_datum(c, xprefix, xname, xvalue, xsize) * is used to create new xdatum and write to medium. - * delete_xattr_datum(c, xd) - * is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows - * GC to reclaim those physical nodes. + * unrefer_xattr_datum(c, xd) + * is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD + * is set on xd->flags and chained xattr_dead_list or release it immediately. + * In the first case, the garbage collector release it later. * -------------------------------------------------- */ static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize) { @@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c, return xd; } -static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) +static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd) { /* must be called under down_write(xattr_sem) */ - BUG_ON(atomic_read(&xd->refcnt)); + if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) { + uint32_t xid = xd->xid, version = xd->version; - unload_xattr_datum(c, xd); - xd->flags |= JFFS2_XFLAGS_DEAD; - spin_lock(&c->erase_completion_lock); - if (xd->node == (void *)xd) { - BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); - jffs2_free_xattr_datum(xd); - } else { - list_add(&xd->xindex, &c->xattr_dead_list); + unload_xattr_datum(c, xd); + xd->flags |= JFFS2_XFLAGS_DEAD; + if (xd->node == (void *)xd) { + BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID)); + jffs2_free_xattr_datum(xd); + } else { + list_add(&xd->xindex, &c->xattr_dead_list); + } + spin_unlock(&c->erase_completion_lock); + + dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version); } - spin_unlock(&c->erase_completion_lock); - dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version); } /* -------- xref related functions ------------------ @@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n", ref->ino, ref->xid, ref->xseqno); - if (atomic_dec_and_test(&xd->refcnt)) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); } void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) @@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, ref->next = c->xref_dead_list; c->xref_dead_list = ref; spin_unlock(&c->erase_completion_lock); - if (atomic_dec_and_test(&xd->refcnt)) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); } else { ref->ic = ic; ref->xd = xd; @@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, down_write(&c->xattr_sem); if (rc) { JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request); - if (atomic_dec_and_test(&xd->refcnt)) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); up_write(&c->xattr_sem); return rc; } @@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, ic->xref = ref; } rc = PTR_ERR(newref); - if (atomic_dec_and_test(&xd->refcnt)) - delete_xattr_datum(c, xd); + unrefer_xattr_datum(c, xd); } else if (ref) { delete_xattr_ref(c, ref); } diff --git a/fs/namei.c b/fs/namei.c index c784e8bb57a3..c9750d755aff 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) struct dentry *p; if (p1 == p2) { - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); return NULL; } @@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) for (p = p1; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p2) { - mutex_lock(&p2->d_inode->i_mutex); - mutex_lock(&p1->d_inode->i_mutex); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } for (p = p2; p->d_parent != p; p = p->d_parent) { if (p->d_parent == p1) { - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return p; } } - mutex_lock(&p1->d_inode->i_mutex); - mutex_lock(&p2->d_inode->i_mutex); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); return NULL; } @@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { struct dentry *dentry = ERR_PTR(-EEXIST); - mutex_lock(&nd->dentry->d_inode->i_mutex); + mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) @@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname) error = -EBUSY; goto exit1; } - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; - mutex_lock(&nd.dentry->d_inode->i_mutex); + mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 4c86b7e1d1eb..d313f356e66a 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni) kmem_cache_free(ntfs_inode_cache, ni); } +/* + * The attribute runlist lock has separate locking rules from the + * normal runlist lock, so split the two lock-classes: + */ +static struct lock_class_key attr_list_rl_lock_class; + /** * __ntfs_init_inode - initialize ntfs specific part of an inode * @sb: super block of mounted volume @@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->attr_list_size = 0; ni->attr_list = NULL; ntfs_init_runlist(&ni->attr_list_rl); + lockdep_set_class(&ni->attr_list_rl.lock, + &attr_list_rl_lock_class); ni->itype.index.bmp_ino = NULL; ni->itype.index.block_size = 0; ni->itype.index.vcn_size = 0; @@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) ni->ext.base_ntfs_ino = NULL; } +/* + * Extent inodes get MFT-mapped in a nested way, while the base inode + * is still mapped. Teach this nesting to the lock validator by creating + * a separate class for nested inode's mrec_lock's: + */ +static struct lock_class_key extent_inode_mrec_lock_key; + inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, unsigned long mft_no) { @@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, ntfs_debug("Entering."); if (likely(ni != NULL)) { __ntfs_init_inode(sb, ni); + lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key); ni->mft_no = mft_no; ni->type = AT_UNUSED; ni->name = NULL; @@ -1722,6 +1738,15 @@ err_out: return err; } +/* + * The MFT inode has special locking, so teach the lock validator + * about this by splitting off the locking rules of the MFT from + * the locking rules of other inodes. The MFT inode can never be + * accessed from the VFS side (or even internally), only by the + * map_mft functions. + */ +static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key; + /** * ntfs_read_inode_mount - special read_inode for mount time use only * @vi: inode to read @@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi) ntfs_attr_put_search_ctx(ctx); ntfs_debug("Done."); ntfs_free(m); + + /* + * Split the locking rules of the MFT inode from the + * locking rules of other inodes: + */ + lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key); + lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key); + return 0; em_put_err_out: diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0e14acea3f8b..74e0ee8fce72 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -1724,6 +1724,14 @@ upcase_failed: return FALSE; } +/* + * The lcn and mft bitmap inodes are NTFS-internal inodes with + * their own special locking rules: + */ +static struct lock_class_key + lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key, + mftbmp_runlist_lock_key, mftbmp_mrec_lock_key; + /** * load_system_files - open the system files using normal functions * @vol: ntfs super block describing device whose system files to load @@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol) ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); goto iput_mirr_err_out; } + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock, + &mftbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock, + &mftbmp_mrec_lock_key); /* Read upcase table and setup @vol->upcase and @vol->upcase_len. */ if (!load_and_init_upcase(vol)) goto iput_mftbmp_err_out; @@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol) iput(vol->lcnbmp_ino); goto bitmap_failed; } + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock, + &lcnbmp_runlist_lock_key); + lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock, + &lcnbmp_mrec_lock_key); + NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { iput(vol->lcnbmp_ino); @@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) struct inode *tmp_ino; int blocksize, result; + /* + * We do a pretty difficult piece of bootstrap by reading the + * MFT (and other metadata) from disk into memory. We'll only + * release this metadata during umount, so the locking patterns + * observed during bootstrap do not count. So turn off the + * observation of locking patterns (strictly for this context + * only) while mounting NTFS. [The validator is still active + * otherwise, even for this context: it will for example record + * lock class registrations.] + */ + lockdep_off(); ntfs_debug("Entering."); #ifndef NTFS_RW sb->s_flags |= MS_RDONLY; @@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) if (!silent) ntfs_error(sb, "Allocation of NTFS volume structure " "failed. Aborting mount..."); + lockdep_on(); return -ENOMEM; } /* Initialize ntfs_volume structure. */ @@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) mutex_unlock(&ntfs_lock); sb->s_export_op = &ntfs_export_ops; lock_kernel(); + lockdep_on(); return 0; } ntfs_error(sb, "Failed to allocate root directory."); @@ -3059,6 +3089,7 @@ err_out_now: sb->s_fs_info = NULL; kfree(vol); ntfs_debug("Failed, returning -EINVAL."); + lockdep_on(); return -EINVAL; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index af69f28277b6..4616ed50ffcd 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount * { struct vm_list_struct *vml; struct vm_area_struct *vma; - struct task_struct *task = proc_task(inode); + struct task_struct *task = get_proc_task(inode); struct mm_struct *mm = get_task_mm(task); int result = -ENOENT; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 28eb3c886034..5567328f1041 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; diff --git a/fs/super.c b/fs/super.c index 9b780c42d845..6d4e8174b6db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock); * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(void) +static struct super_block *alloc_super(struct file_system_type *type) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static struct super_operations default_op; @@ -72,6 +72,13 @@ static struct super_block *alloc_super(void) INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); mutex_init(&s->s_lock); + lockdep_set_class(&s->s_umount, &type->s_umount_key); + /* + * The locking rules for s_lock are up to the + * filesystem. For example ext3fs has different + * lock ordering than usbfs: + */ + lockdep_set_class(&s->s_lock, &type->s_lock_key); down_write(&s->s_umount); s->s_count = S_BIAS; atomic_set(&s->s_active, 1); @@ -295,7 +302,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(); + s = alloc_super(type); if (!s) return ERR_PTR(-ENOMEM); goto retry; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 19a99726e58d..992ee0b87cc3 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head *bh; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; |