Merge branch 'master' of /home/trondmy/kernel/linux-2.6/

author: Trond Myklebust <Trond.Myklebust@netapp.com> 2006-07-05 13:13:03 -0400
committer: Trond Myklebust <Trond.Myklebust@netapp.com> 2006-07-05 13:13:03 -0400
commit: 5e66dd6d66ffe758b39b6dcadf2330753ee1159b (patch)
tree: a72cdcff4448e4af9425cc213ddf56ab23e697fe /fs
parent: 026477c1141b67e98e3bd8bdedb7d4b88a3ecd09 (diff)
parent: ca78f6baca863afe2e6a244a0fe94b3a70211d46 (diff)
download: lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.tar.gz
lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.zip
21 files changed, 220 insertions, 80 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d0434406eaeb..f42e64210ee5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {
 		.min_coredump	= ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			 * <= p_memsize so it's only necessary to check p_memsz.
 			 */
 			k = load_addr + eppnt->p_vaddr;
-			if (k > TASK_SIZE ||
+			if (BAD_ADDR(k) ||
 			    eppnt->p_filesz > eppnt->p_memsz ||
 			    eppnt->p_memsz > TASK_SIZE ||
 			    TASK_SIZE - eppnt->p_memsz < k) {
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		 * allowed task size. Note that p_filesz must always be
 		 * <= p_memsz so it is only necessary to check p_memsz.
 		 */
-		if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
+		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 		    elf_ppnt->p_memsz > TASK_SIZE ||
 		    TASK_SIZE - elf_ppnt->p_memsz < k) {
 			/* set_brk can never work. Avoid overflows. */
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 						    interpreter,
 						    &interp_load_addr);
 		if (BAD_ADDR(elf_entry)) {
-			printk(KERN_ERR "Unable to load interpreter %.128s\n",
-				elf_interpreter);
 			force_sig(SIGSEGV, current);
-			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+			retval = IS_ERR((void *)elf_entry) ?
+					(int)elf_entry : -EINVAL;
 			goto out_free_dentry;
 		}
 		reloc_func_desc = interp_load_addr;
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	} else {
 		elf_entry = loc->elf_ex.e_entry;
 		if (BAD_ADDR(elf_entry)) {
-			send_sig(SIGSEGV, current, 0);
-			retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+			force_sig(SIGSEGV, current);
+			retval = -EINVAL;
 			goto out_free_dentry;
 		}
 	}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9633a490dab0..37534573960b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
 	if (!bo)
 		return -ENOMEM;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	res = bd_claim(bdev, holder);
 	if (res || !add_bd_holder(bdev, bo))
 		free_bd_holder(bo);
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
 	if (!kobj)
 		return;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
 	bd_release(bdev);
 	if ((bo = del_bd_holder(bdev, kobj)))
 		free_bd_holder(bo);
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
 
 EXPORT_SYMBOL(open_by_devnum);
 
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
+
+struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
+{
+	struct block_device *bdev = bdget(dev);
+	int err = -ENOMEM;
+	int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
+	if (bdev)
+		err = blkdev_get_partition(bdev, mode, flags);
+	return err ? ERR_PTR(err) : bdev;
+}
+
+EXPORT_SYMBOL(open_partition_by_devnum);
+
+
 /*
  * This routine checks whether a removable media has been changed,
  * and invalidates all buffer-cache-entries in that case. This
@@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
 
-static int do_open(struct block_device *bdev, struct file *file)
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
+
+static int
+do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
 {
 	struct module *owner = NULL;
 	struct gendisk *disk;
@@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)
 	}
 	owner = disk->fops->owner;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
+
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
 		bdev->bd_contains = bdev;
@@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)
 			ret = -ENOMEM;
 			if (!whole)
 				goto out_first;
-			ret = blkdev_get(whole, file->f_mode, file->f_flags);
+			ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
 			if (ret)
 				goto out_first;
 			bdev->bd_contains = whole;
-			mutex_lock(&whole->bd_mutex);
+			mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
 			whole->bd_part_count++;
 			p = disk->part[part - 1];
 			bdev->bd_inode->i_data.backing_dev_info =
@@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)
 			if (bdev->bd_invalidated)
 				rescan_partitions(bdev->bd_disk, bdev);
 		} else {
-			mutex_lock(&bdev->bd_contains->bd_mutex);
+			mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+					  BD_MUTEX_PARTITION);
 			bdev->bd_contains->bd_part_count++;
 			mutex_unlock(&bdev->bd_contains->bd_mutex);
 		}
@@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
 	fake_file.f_dentry = &fake_dentry;
 	fake_dentry.d_inode = bdev->bd_inode;
 
-	return do_open(bdev, &fake_file);
+	return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
 }
 
 EXPORT_SYMBOL(blkdev_get);
 
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+	/*
+	 * This crockload is due to bad choice of ->open() type.
+	 * It will go away.
+	 * For now, block device ->open() routine must _not_
+	 * examine anything in 'inode' argument except ->i_rdev.
+	 */
+	struct file fake_file = {};
+	struct dentry fake_dentry = {};
+	fake_file.f_mode = mode;
+	fake_file.f_flags = flags;
+	fake_file.f_dentry = &fake_dentry;
+	fake_dentry.d_inode = bdev->bd_inode;
+
+	return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
+}
+
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+	/*
+	 * This crockload is due to bad choice of ->open() type.
+	 * It will go away.
+	 * For now, block device ->open() routine must _not_
+	 * examine anything in 'inode' argument except ->i_rdev.
+	 */
+	struct file fake_file = {};
+	struct dentry fake_dentry = {};
+	fake_file.f_mode = mode;
+	fake_file.f_flags = flags;
+	fake_file.f_dentry = &fake_dentry;
+	fake_dentry.d_inode = bdev->bd_inode;
+
+	return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
+}
+
 static int blkdev_open(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev;
@@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 
 	bdev = bd_acquire(inode);
 
-	res = do_open(bdev, filp);
+	res = do_open(bdev, filp, BD_MUTEX_NORMAL);
 	if (res)
 		return res;
 
@@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return res;
 }
 
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
 {
 	int ret = 0;
 	struct inode *bd_inode = bdev->bd_inode;
 	struct gendisk *disk = bdev->bd_disk;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock_nested(&bdev->bd_mutex, subclass);
 	lock_kernel();
 	if (!--bdev->bd_openers) {
 		sync_blockdev(bdev);
@@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)
 		if (disk->fops->release)
 			ret = disk->fops->release(bd_inode, NULL);
 	} else {
-		mutex_lock(&bdev->bd_contains->bd_mutex);
+		mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+				  subclass + 1);
 		bdev->bd_contains->bd_part_count--;
 		mutex_unlock(&bdev->bd_contains->bd_mutex);
 	}
@@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)
 		}
 		bdev->bd_disk = NULL;
 		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-		if (bdev != bdev->bd_contains) {
-			blkdev_put(bdev->bd_contains);
-		}
+		if (bdev != bdev->bd_contains)
+			__blkdev_put(bdev->bd_contains, subclass + 1);
 		bdev->bd_contains = NULL;
 	}
 	unlock_kernel();
@@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)
 	return ret;
 }
 
+int blkdev_put(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+
 EXPORT_SYMBOL(blkdev_put);
 
+int blkdev_put_partition(struct block_device *bdev)
+{
+	return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+
+EXPORT_SYMBOL(blkdev_put_partition);
+
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
 	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/dcache.c b/fs/dcache.c
index c6e3535be192..1b4a3a34ec57 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
  __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(dcache_lock);
 
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	 */
 	if (target < dentry) {
 		spin_lock(&target->d_lock);
-		spin_lock(&dentry->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 	} else {
 		spin_lock(&dentry->d_lock);
-		spin_lock(&target->d_lock);
+		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
 	}
 
 	/* Move the dentry to the target hash queue, if on different bucket */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 538fb0418fba..5981e17f46f0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 	if (dio->end_io && dio->result)
 		dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
 	if (dio->lock_type == DIO_LOCKING)
-		up_read(&dio->inode->i_alloc_sem);
+		/* lockdep: non-owner release */
+		up_read_non_owner(&dio->inode->i_alloc_sem);
 }
 
 /*
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 		}
 
 		if (dio_lock_type == DIO_LOCKING)
-			down_read(&inode->i_alloc_sem);
+			/* lockdep: not the owner will release it */
+			down_read_non_owner(&inode->i_alloc_sem);
 	}
 
 	/*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9c677bbd0b08..19ffb043abbc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -120,7 +120,7 @@ struct epoll_filefd {
  */
 struct wake_task_node {
 	struct list_head llink;
-	task_t *task;
+	struct task_struct *task;
 	wait_queue_head_t *wq;
 };
 
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
 {
 	int wake_nests = 0;
 	unsigned long flags;
-	task_t *this_task = current;
+	struct task_struct *this_task = current;
 	struct list_head *lsthead = &psw->wake_task_list, *lnk;
 	struct wake_task_node *tncur;
 	struct wake_task_node tnode;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f43879d6d68..f2702cda9779 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
 	struct buffer_head tmp_bh;
 	struct buffer_head *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f2dd71336612..813d589cc6c0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 9c2077e7e081..0ae3cd10702c 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir)
 	return rc;
 }
 
-void jffs2_clear_acl(struct inode *inode)
+void jffs2_clear_acl(struct jffs2_inode_info *f)
 {
-	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-
 	if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
 		posix_acl_release(f->i_acl_access);
 		f->i_acl_access = JFFS2_ACL_NOT_CACHED;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8893bd1a6ba7..fa327dbd3171 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -30,7 +30,7 @@ struct jffs2_acl_header {
 extern int jffs2_permission(struct inode *, int, struct nameidata *);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_init_acl(struct inode *, struct inode *);
-extern void jffs2_clear_acl(struct inode *);
+extern void jffs2_clear_acl(struct jffs2_inode_info *);
 
 extern struct xattr_handler jffs2_acl_access_xattr_handler;
 extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
 #define jffs2_permission NULL
 #define jffs2_acl_chmod(inode)		(0)
 #define jffs2_init_acl(inode,dir)	(0)
-#define jffs2_clear_acl(inode)
+#define jffs2_clear_acl(f)
 
 #endif	/* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 8310c95478e9..33f291005012 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x)
 	kmem_cache_free(tmp_dnode_info_slab, x);
 }
 
-struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
+static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
 {
 	struct jffs2_raw_node_ref *ret;
 
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index f752baa8d399..cae92c14116d 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -426,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f);
 /* scan.c */
 int jffs2_scan_medium(struct jffs2_sb_info *c);
 void jffs2_rotate_lists(struct jffs2_sb_info *c);
-int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
-				uint32_t ofs, uint32_t len);
 struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino);
 int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
 int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index cc1899268c43..266423b2709d 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
 	struct jffs2_full_dirent *fd, *fds;
 	int deleted;
 
+	jffs2_clear_acl(f);
 	jffs2_xattr_delete_inode(c, f->inocache);
 	down(&f->sem);
 	deleted = f->inocache && !f->inocache->nlink;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 2bfdc33752d3..e2413466ddd5 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
 	return ret;
 }
 
-int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf,
-				uint32_t ofs, uint32_t len)
+static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
+			       uint32_t ofs, uint32_t len)
 {
 	int ret;
 	size_t retlen;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 18e66dbf23b4..25bc1ae08648 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -50,9 +50,10 @@
  *   is used to write xdatum to medium. xd->version will be incremented.
  * create_xattr_datum(c, xprefix, xname, xvalue, xsize)
  *   is used to create new xdatum and write to medium.
- * delete_xattr_datum(c, xd)
- *   is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
- *   GC to reclaim those physical nodes.
+ * unrefer_xattr_datum(c, xd)
+ *   is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD
+ *   is set on xd->flags and chained xattr_dead_list or release it immediately.
+ *   In the first case, the garbage collector release it later.
  * -------------------------------------------------- */
 static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
 {
@@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
 	return xd;
 }
 
-static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
 {
 	/* must be called under down_write(xattr_sem) */
-	BUG_ON(atomic_read(&xd->refcnt));
+	if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) {
+		uint32_t xid = xd->xid, version = xd->version;
 
-	unload_xattr_datum(c, xd);
-	xd->flags |= JFFS2_XFLAGS_DEAD;
-	spin_lock(&c->erase_completion_lock);
-	if (xd->node == (void *)xd) {
-		BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
-		jffs2_free_xattr_datum(xd);
-	} else {
-		list_add(&xd->xindex, &c->xattr_dead_list);
+		unload_xattr_datum(c, xd);
+		xd->flags |= JFFS2_XFLAGS_DEAD;
+		if (xd->node == (void *)xd) {
+			BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
+			jffs2_free_xattr_datum(xd);
+		} else {
+			list_add(&xd->xindex, &c->xattr_dead_list);
+		}
+		spin_unlock(&c->erase_completion_lock);
+
+		dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version);
 	}
-	spin_unlock(&c->erase_completion_lock);
-	dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
 }
 
 /* -------- xref related functions ------------------
@@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
 	dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
 		  ref->ino, ref->xid, ref->xseqno);
 
-	if (atomic_dec_and_test(&xd->refcnt))
-		delete_xattr_datum(c, xd);
+	unrefer_xattr_datum(c, xd);
 }
 
 void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
@@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 					ref->next = c->xref_dead_list;
 					c->xref_dead_list = ref;
 					spin_unlock(&c->erase_completion_lock);
-					if (atomic_dec_and_test(&xd->refcnt))
-						delete_xattr_datum(c, xd);
+					unrefer_xattr_datum(c, xd);
 				} else {
 					ref->ic = ic;
 					ref->xd = xd;
@@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 	down_write(&c->xattr_sem);
 	if (rc) {
 		JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
-		if (atomic_dec_and_test(&xd->refcnt))
-			delete_xattr_datum(c, xd);
+		unrefer_xattr_datum(c, xd);
 		up_write(&c->xattr_sem);
 		return rc;
 	}
@@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
 			ic->xref = ref;
 		}
 		rc = PTR_ERR(newref);
-		if (atomic_dec_and_test(&xd->refcnt))
-			delete_xattr_datum(c, xd);
+		unrefer_xattr_datum(c, xd);
 	} else if (ref) {
 		delete_xattr_ref(c, ref);
 	}
diff --git a/fs/namei.c b/fs/namei.c
index c784e8bb57a3..c9750d755aff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	struct dentry *p;
 
 	if (p1 == p2) {
-		mutex_lock(&p1->d_inode->i_mutex);
+		mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
 		return NULL;
 	}
 
@@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 
 	for (p = p1; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p2) {
-			mutex_lock(&p2->d_inode->i_mutex);
-			mutex_lock(&p1->d_inode->i_mutex);
+			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
+			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
 			return p;
 		}
 	}
 
 	for (p = p2; p->d_parent != p; p = p->d_parent) {
 		if (p->d_parent == p1) {
-			mutex_lock(&p1->d_inode->i_mutex);
-			mutex_lock(&p2->d_inode->i_mutex);
+			mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+			mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
 			return p;
 		}
 	}
 
-	mutex_lock(&p1->d_inode->i_mutex);
-	mutex_lock(&p2->d_inode->i_mutex);
+	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
 	return NULL;
 }
 
@@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
 	struct dentry *dentry = ERR_PTR(-EEXIST);
 
-	mutex_lock(&nd->dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	/*
 	 * Yucky last component or no last component at all?
 	 * (foo/., foo/.., /////)
@@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
 			error = -EBUSY;
 			goto exit1;
 	}
-	mutex_lock(&nd.dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
@@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 	error = -EISDIR;
 	if (nd.last_type != LAST_NORM)
 		goto exit1;
-	mutex_lock(&nd.dentry->d_inode->i_mutex);
+	mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
 	dentry = lookup_hash(&nd);
 	error = PTR_ERR(dentry);
 	if (!IS_ERR(dentry)) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4c86b7e1d1eb..d313f356e66a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
 	kmem_cache_free(ntfs_inode_cache, ni);
 }
 
+/*
+ * The attribute runlist lock has separate locking rules from the
+ * normal runlist lock, so split the two lock-classes:
+ */
+static struct lock_class_key attr_list_rl_lock_class;
+
 /**
  * __ntfs_init_inode - initialize ntfs specific part of an inode
  * @sb:		super block of mounted volume
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->attr_list_size = 0;
 	ni->attr_list = NULL;
 	ntfs_init_runlist(&ni->attr_list_rl);
+	lockdep_set_class(&ni->attr_list_rl.lock,
+				&attr_list_rl_lock_class);
 	ni->itype.index.bmp_ino = NULL;
 	ni->itype.index.block_size = 0;
 	ni->itype.index.vcn_size = 0;
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
 	ni->ext.base_ntfs_ino = NULL;
 }
 
+/*
+ * Extent inodes get MFT-mapped in a nested way, while the base inode
+ * is still mapped. Teach this nesting to the lock validator by creating
+ * a separate class for nested inode's mrec_lock's:
+ */
+static struct lock_class_key extent_inode_mrec_lock_key;
+
 inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
 		unsigned long mft_no)
 {
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
 	ntfs_debug("Entering.");
 	if (likely(ni != NULL)) {
 		__ntfs_init_inode(sb, ni);
+		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
 		ni->mft_no = mft_no;
 		ni->type = AT_UNUSED;
 		ni->name = NULL;
@@ -1722,6 +1738,15 @@ err_out:
 	return err;
 }
 
+/*
+ * The MFT inode has special locking, so teach the lock validator
+ * about this by splitting off the locking rules of the MFT from
+ * the locking rules of other inodes. The MFT inode can never be
+ * accessed from the VFS side (or even internally), only by the
+ * map_mft functions.
+ */
+static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
+
 /**
  * ntfs_read_inode_mount - special read_inode for mount time use only
  * @vi:		inode to read
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)
 	ntfs_attr_put_search_ctx(ctx);
 	ntfs_debug("Done.");
 	ntfs_free(m);
+
+	/*
+	 * Split the locking rules of the MFT inode from the
+	 * locking rules of other inodes:
+	 */
+	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
+	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
+
 	return 0;
 
 em_put_err_out:
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0e14acea3f8b..74e0ee8fce72 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1724,6 +1724,14 @@ upcase_failed:
 	return FALSE;
 }
 
+/*
+ * The lcn and mft bitmap inodes are NTFS-internal inodes with
+ * their own special locking rules:
+ */
+static struct lock_class_key
+	lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
+	mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
+
 /**
  * load_system_files - open the system files using normal functions
  * @vol:	ntfs super block describing device whose system files to load
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)
 		ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
 		goto iput_mirr_err_out;
 	}
+	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
+			   &mftbmp_runlist_lock_key);
+	lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
+			   &mftbmp_mrec_lock_key);
 	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
 	if (!load_and_init_upcase(vol))
 		goto iput_mftbmp_err_out;
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)
 			iput(vol->lcnbmp_ino);
 		goto bitmap_failed;
 	}
+	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
+			   &lcnbmp_runlist_lock_key);
+	lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
+			   &lcnbmp_mrec_lock_key);
+
 	NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
 	if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
 		iput(vol->lcnbmp_ino);
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 	struct inode *tmp_ino;
 	int blocksize, result;
 
+	/*
+	 * We do a pretty difficult piece of bootstrap by reading the
+	 * MFT (and other metadata) from disk into memory. We'll only
+	 * release this metadata during umount, so the locking patterns
+	 * observed during bootstrap do not count. So turn off the
+	 * observation of locking patterns (strictly for this context
+	 * only) while mounting NTFS. [The validator is still active
+	 * otherwise, even for this context: it will for example record
+	 * lock class registrations.]
+	 */
+	lockdep_off();
 	ntfs_debug("Entering.");
 #ifndef NTFS_RW
 	sb->s_flags |= MS_RDONLY;
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		if (!silent)
 			ntfs_error(sb, "Allocation of NTFS volume structure "
 					"failed. Aborting mount...");
+		lockdep_on();
 		return -ENOMEM;
 	}
 	/* Initialize ntfs_volume structure. */
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		mutex_unlock(&ntfs_lock);
 		sb->s_export_op = &ntfs_export_ops;
 		lock_kernel();
+		lockdep_on();
 		return 0;
 	}
 	ntfs_error(sb, "Failed to allocate root directory.");
@@ -3059,6 +3089,7 @@ err_out_now:
 	sb->s_fs_info = NULL;
 	kfree(vol);
 	ntfs_debug("Failed, returning -EINVAL.");
+	lockdep_on();
 	return -EINVAL;
 }
 
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index af69f28277b6..4616ed50ffcd 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
 {
 	struct vm_list_struct *vml;
 	struct vm_area_struct *vma;
-	struct task_struct *task = proc_task(inode);
+	struct task_struct *task = get_proc_task(inode);
 	struct mm_struct *mm = get_task_mm(task);
 	int result = -ENOENT;
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 28eb3c886034..5567328f1041 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head tmp_bh, *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 		    sb->s_blocksize - offset : towrite;
diff --git a/fs/super.c b/fs/super.c
index 9b780c42d845..6d4e8174b6db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);
  *	Allocates and initializes a new &struct super_block.  alloc_super()
  *	returns a pointer new superblock or %NULL if allocation had failed.
  */
-static struct super_block *alloc_super(void)
+static struct super_block *alloc_super(struct file_system_type *type)
 {
 	struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
 	static struct super_operations default_op;
@@ -72,6 +72,13 @@ static struct super_block *alloc_super(void)
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
+		lockdep_set_class(&s->s_umount, &type->s_umount_key);
+		/*
+		 * The locking rules for s_lock are up to the
+		 * filesystem. For example ext3fs has different
+		 * lock ordering than usbfs:
+		 */
+		lockdep_set_class(&s->s_lock, &type->s_lock_key);
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
@@ -295,7 +302,7 @@ retry:
 	}
 	if (!s) {
 		spin_unlock(&sb_lock);
-		s = alloc_super();
+		s = alloc_super(type);
 		if (!s)
 			return ERR_PTR(-ENOMEM);
 		goto retry;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 19a99726e58d..992ee0b87cc3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
 	size_t towrite = len;
 	struct buffer_head *bh;
 
-	mutex_lock(&inode->i_mutex);
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
author	Trond Myklebust <Trond.Myklebust@netapp.com>	2006-07-05 13:13:03 -0400
committer	Trond Myklebust <Trond.Myklebust@netapp.com>	2006-07-05 13:13:03 -0400
commit	5e66dd6d66ffe758b39b6dcadf2330753ee1159b (patch)
tree	a72cdcff4448e4af9425cc213ddf56ab23e697fe /fs
parent	026477c1141b67e98e3bd8bdedb7d4b88a3ecd09 (diff)
parent	ca78f6baca863afe2e6a244a0fe94b3a70211d46 (diff)
download	lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.tar.gz lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.zip