summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-07-05 13:13:03 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-07-05 13:13:03 -0400
commit5e66dd6d66ffe758b39b6dcadf2330753ee1159b (patch)
treea72cdcff4448e4af9425cc213ddf56ab23e697fe /fs
parent026477c1141b67e98e3bd8bdedb7d4b88a3ecd09 (diff)
parentca78f6baca863afe2e6a244a0fe94b3a70211d46 (diff)
downloadlwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.tar.gz
lwn-5e66dd6d66ffe758b39b6dcadf2330753ee1159b.zip
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c15
-rw-r--r--fs/block_dev.c102
-rw-r--r--fs/dcache.c6
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/eventpoll.c4
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/malloc.c2
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/readinode.c1
-rw-r--r--fs/jffs2/scan.c4
-rw-r--r--fs/jffs2/xattr.c45
-rw-r--r--fs/namei.c20
-rw-r--r--fs/ntfs/inode.c33
-rw-r--r--fs/ntfs/super.c31
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/super.c11
-rw-r--r--fs/ufs/super.c2
21 files changed, 220 insertions, 80 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d0434406eaeb..f42e64210ee5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -84,7 +84,7 @@ static struct linux_binfmt elf_format = {
.min_coredump = ELF_EXEC_PAGESIZE
};
-#define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
static int set_brk(unsigned long start, unsigned long end)
{
@@ -394,7 +394,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
* <= p_memsize so it's only necessary to check p_memsz.
*/
k = load_addr + eppnt->p_vaddr;
- if (k > TASK_SIZE ||
+ if (BAD_ADDR(k) ||
eppnt->p_filesz > eppnt->p_memsz ||
eppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - eppnt->p_memsz < k) {
@@ -887,7 +887,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
* allowed task size. Note that p_filesz must always be
* <= p_memsz so it is only necessary to check p_memsz.
*/
- if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
+ if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
@@ -941,10 +941,9 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
interpreter,
&interp_load_addr);
if (BAD_ADDR(elf_entry)) {
- printk(KERN_ERR "Unable to load interpreter %.128s\n",
- elf_interpreter);
force_sig(SIGSEGV, current);
- retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+ retval = IS_ERR((void *)elf_entry) ?
+ (int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
@@ -955,8 +954,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
- send_sig(SIGSEGV, current, 0);
- retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+ force_sig(SIGSEGV, current);
+ retval = -EINVAL;
goto out_free_dentry;
}
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9633a490dab0..37534573960b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -739,7 +739,7 @@ static int bd_claim_by_kobject(struct block_device *bdev, void *holder,
if (!bo)
return -ENOMEM;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
res = bd_claim(bdev, holder);
if (res || !add_bd_holder(bdev, bo))
free_bd_holder(bo);
@@ -764,7 +764,7 @@ static void bd_release_from_kobject(struct block_device *bdev,
if (!kobj)
return;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, BD_MUTEX_PARTITION);
bd_release(bdev);
if ((bo = del_bd_holder(bdev, kobj)))
free_bd_holder(bo);
@@ -822,6 +822,22 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
EXPORT_SYMBOL(open_by_devnum);
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags);
+
+struct block_device *open_partition_by_devnum(dev_t dev, unsigned mode)
+{
+ struct block_device *bdev = bdget(dev);
+ int err = -ENOMEM;
+ int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
+ if (bdev)
+ err = blkdev_get_partition(bdev, mode, flags);
+ return err ? ERR_PTR(err) : bdev;
+}
+
+EXPORT_SYMBOL(open_partition_by_devnum);
+
+
/*
* This routine checks whether a removable media has been changed,
* and invalidates all buffer-cache-entries in that case. This
@@ -868,7 +884,11 @@ void bd_set_size(struct block_device *bdev, loff_t size)
}
EXPORT_SYMBOL(bd_set_size);
-static int do_open(struct block_device *bdev, struct file *file)
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
+
+static int
+do_open(struct block_device *bdev, struct file *file, unsigned int subclass)
{
struct module *owner = NULL;
struct gendisk *disk;
@@ -885,7 +905,8 @@ static int do_open(struct block_device *bdev, struct file *file)
}
owner = disk->fops->owner;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, subclass);
+
if (!bdev->bd_openers) {
bdev->bd_disk = disk;
bdev->bd_contains = bdev;
@@ -912,11 +933,11 @@ static int do_open(struct block_device *bdev, struct file *file)
ret = -ENOMEM;
if (!whole)
goto out_first;
- ret = blkdev_get(whole, file->f_mode, file->f_flags);
+ ret = blkdev_get_whole(whole, file->f_mode, file->f_flags);
if (ret)
goto out_first;
bdev->bd_contains = whole;
- mutex_lock(&whole->bd_mutex);
+ mutex_lock_nested(&whole->bd_mutex, BD_MUTEX_WHOLE);
whole->bd_part_count++;
p = disk->part[part - 1];
bdev->bd_inode->i_data.backing_dev_info =
@@ -944,7 +965,8 @@ static int do_open(struct block_device *bdev, struct file *file)
if (bdev->bd_invalidated)
rescan_partitions(bdev->bd_disk, bdev);
} else {
- mutex_lock(&bdev->bd_contains->bd_mutex);
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+ BD_MUTEX_PARTITION);
bdev->bd_contains->bd_part_count++;
mutex_unlock(&bdev->bd_contains->bd_mutex);
}
@@ -985,11 +1007,49 @@ int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
fake_file.f_dentry = &fake_dentry;
fake_dentry.d_inode = bdev->bd_inode;
- return do_open(bdev, &fake_file);
+ return do_open(bdev, &fake_file, BD_MUTEX_NORMAL);
}
EXPORT_SYMBOL(blkdev_get);
+static int
+blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+ /*
+ * This crockload is due to bad choice of ->open() type.
+ * It will go away.
+ * For now, block device ->open() routine must _not_
+ * examine anything in 'inode' argument except ->i_rdev.
+ */
+ struct file fake_file = {};
+ struct dentry fake_dentry = {};
+ fake_file.f_mode = mode;
+ fake_file.f_flags = flags;
+ fake_file.f_dentry = &fake_dentry;
+ fake_dentry.d_inode = bdev->bd_inode;
+
+ return do_open(bdev, &fake_file, BD_MUTEX_WHOLE);
+}
+
+static int
+blkdev_get_partition(struct block_device *bdev, mode_t mode, unsigned flags)
+{
+ /*
+ * This crockload is due to bad choice of ->open() type.
+ * It will go away.
+ * For now, block device ->open() routine must _not_
+ * examine anything in 'inode' argument except ->i_rdev.
+ */
+ struct file fake_file = {};
+ struct dentry fake_dentry = {};
+ fake_file.f_mode = mode;
+ fake_file.f_flags = flags;
+ fake_file.f_dentry = &fake_dentry;
+ fake_dentry.d_inode = bdev->bd_inode;
+
+ return do_open(bdev, &fake_file, BD_MUTEX_PARTITION);
+}
+
static int blkdev_open(struct inode * inode, struct file * filp)
{
struct block_device *bdev;
@@ -1005,7 +1065,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
bdev = bd_acquire(inode);
- res = do_open(bdev, filp);
+ res = do_open(bdev, filp, BD_MUTEX_NORMAL);
if (res)
return res;
@@ -1019,13 +1079,13 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return res;
}
-int blkdev_put(struct block_device *bdev)
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
{
int ret = 0;
struct inode *bd_inode = bdev->bd_inode;
struct gendisk *disk = bdev->bd_disk;
- mutex_lock(&bdev->bd_mutex);
+ mutex_lock_nested(&bdev->bd_mutex, subclass);
lock_kernel();
if (!--bdev->bd_openers) {
sync_blockdev(bdev);
@@ -1035,7 +1095,8 @@ int blkdev_put(struct block_device *bdev)
if (disk->fops->release)
ret = disk->fops->release(bd_inode, NULL);
} else {
- mutex_lock(&bdev->bd_contains->bd_mutex);
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+ subclass + 1);
bdev->bd_contains->bd_part_count--;
mutex_unlock(&bdev->bd_contains->bd_mutex);
}
@@ -1051,9 +1112,8 @@ int blkdev_put(struct block_device *bdev)
}
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
- if (bdev != bdev->bd_contains) {
- blkdev_put(bdev->bd_contains);
- }
+ if (bdev != bdev->bd_contains)
+ __blkdev_put(bdev->bd_contains, subclass + 1);
bdev->bd_contains = NULL;
}
unlock_kernel();
@@ -1062,8 +1122,20 @@ int blkdev_put(struct block_device *bdev)
return ret;
}
+int blkdev_put(struct block_device *bdev)
+{
+ return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+
EXPORT_SYMBOL(blkdev_put);
+int blkdev_put_partition(struct block_device *bdev)
+{
+ return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+
+EXPORT_SYMBOL(blkdev_put_partition);
+
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/dcache.c b/fs/dcache.c
index c6e3535be192..1b4a3a34ec57 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,7 +38,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
__cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
-static seqlock_t rename_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
EXPORT_SYMBOL(dcache_lock);
@@ -1339,10 +1339,10 @@ void d_move(struct dentry * dentry, struct dentry * target)
*/
if (target < dentry) {
spin_lock(&target->d_lock);
- spin_lock(&dentry->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
} else {
spin_lock(&dentry->d_lock);
- spin_lock(&target->d_lock);
+ spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
}
/* Move the dentry to the target hash queue, if on different bucket */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 538fb0418fba..5981e17f46f0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -220,7 +220,8 @@ static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
if (dio->end_io && dio->result)
dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
if (dio->lock_type == DIO_LOCKING)
- up_read(&dio->inode->i_alloc_sem);
+ /* lockdep: non-owner release */
+ up_read_non_owner(&dio->inode->i_alloc_sem);
}
/*
@@ -1261,7 +1262,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
}
if (dio_lock_type == DIO_LOCKING)
- down_read(&inode->i_alloc_sem);
+ /* lockdep: not the owner will release it */
+ down_read_non_owner(&inode->i_alloc_sem);
}
/*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9c677bbd0b08..19ffb043abbc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -120,7 +120,7 @@ struct epoll_filefd {
*/
struct wake_task_node {
struct list_head llink;
- task_t *task;
+ struct task_struct *task;
wait_queue_head_t *wq;
};
@@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
{
int wake_nests = 0;
unsigned long flags;
- task_t *this_task = current;
+ struct task_struct *this_task = current;
struct list_head *lsthead = &psw->wake_task_list, *lnk;
struct wake_task_node *tncur;
struct wake_task_node tnode;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f43879d6d68..f2702cda9779 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1157,7 +1157,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh;
struct buffer_head *bh;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index f2dd71336612..813d589cc6c0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2614,7 +2614,7 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 9c2077e7e081..0ae3cd10702c 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -345,10 +345,8 @@ int jffs2_init_acl(struct inode *inode, struct inode *dir)
return rc;
}
-void jffs2_clear_acl(struct inode *inode)
+void jffs2_clear_acl(struct jffs2_inode_info *f)
{
- struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-
if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) {
posix_acl_release(f->i_acl_access);
f->i_acl_access = JFFS2_ACL_NOT_CACHED;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 8893bd1a6ba7..fa327dbd3171 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -30,7 +30,7 @@ struct jffs2_acl_header {
extern int jffs2_permission(struct inode *, int, struct nameidata *);
extern int jffs2_acl_chmod(struct inode *);
extern int jffs2_init_acl(struct inode *, struct inode *);
-extern void jffs2_clear_acl(struct inode *);
+extern void jffs2_clear_acl(struct jffs2_inode_info *);
extern struct xattr_handler jffs2_acl_access_xattr_handler;
extern struct xattr_handler jffs2_acl_default_xattr_handler;
@@ -40,6 +40,6 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
#define jffs2_permission NULL
#define jffs2_acl_chmod(inode) (0)
#define jffs2_init_acl(inode,dir) (0)
-#define jffs2_clear_acl(inode)
+#define jffs2_clear_acl(f)
#endif /* CONFIG_JFFS2_FS_POSIX_ACL */
diff --git a/fs/jffs2/malloc.c b/fs/jffs2/malloc.c
index 8310c95478e9..33f291005012 100644
--- a/fs/jffs2/malloc.c
+++ b/fs/jffs2/malloc.c
@@ -190,7 +190,7 @@ void jffs2_free_tmp_dnode_info(struct jffs2_tmp_dnode_info *x)
kmem_cache_free(tmp_dnode_info_slab, x);
}
-struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
+static struct jffs2_raw_node_ref *jffs2_alloc_refblock(void)
{
struct jffs2_raw_node_ref *ret;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index f752baa8d399..cae92c14116d 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -426,8 +426,6 @@ char *jffs2_getlink(struct jffs2_sb_info *c, struct jffs2_inode_info *f);
/* scan.c */
int jffs2_scan_medium(struct jffs2_sb_info *c);
void jffs2_rotate_lists(struct jffs2_sb_info *c);
-int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
- uint32_t ofs, uint32_t len);
struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uint32_t ino);
int jffs2_scan_classify_jeb(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb);
int jffs2_scan_dirty_space(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t size);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index cc1899268c43..266423b2709d 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -968,6 +968,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
struct jffs2_full_dirent *fd, *fds;
int deleted;
+ jffs2_clear_acl(f);
jffs2_xattr_delete_inode(c, f->inocache);
down(&f->sem);
deleted = f->inocache && !f->inocache->nlink;
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 2bfdc33752d3..e2413466ddd5 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -274,8 +274,8 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
return ret;
}
-int jffs2_fill_scan_buf (struct jffs2_sb_info *c, void *buf,
- uint32_t ofs, uint32_t len)
+static int jffs2_fill_scan_buf(struct jffs2_sb_info *c, void *buf,
+ uint32_t ofs, uint32_t len)
{
int ret;
size_t retlen;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 18e66dbf23b4..25bc1ae08648 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -50,9 +50,10 @@
* is used to write xdatum to medium. xd->version will be incremented.
* create_xattr_datum(c, xprefix, xname, xvalue, xsize)
* is used to create new xdatum and write to medium.
- * delete_xattr_datum(c, xd)
- * is used to delete a xdatum. It marks xd JFFS2_XFLAGS_DEAD, and allows
- * GC to reclaim those physical nodes.
+ * unrefer_xattr_datum(c, xd)
+ * is used to delete a xdatum. When nobody refers this xdatum, JFFS2_XFLAGS_DEAD
+ * is set on xd->flags and chained xattr_dead_list or release it immediately.
+ * In the first case, the garbage collector release it later.
* -------------------------------------------------- */
static uint32_t xattr_datum_hashkey(int xprefix, const char *xname, const char *xvalue, int xsize)
{
@@ -394,22 +395,24 @@ static struct jffs2_xattr_datum *create_xattr_datum(struct jffs2_sb_info *c,
return xd;
}
-static void delete_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
+static void unrefer_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
{
/* must be called under down_write(xattr_sem) */
- BUG_ON(atomic_read(&xd->refcnt));
+ if (atomic_dec_and_lock(&xd->refcnt, &c->erase_completion_lock)) {
+ uint32_t xid = xd->xid, version = xd->version;
- unload_xattr_datum(c, xd);
- xd->flags |= JFFS2_XFLAGS_DEAD;
- spin_lock(&c->erase_completion_lock);
- if (xd->node == (void *)xd) {
- BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
- jffs2_free_xattr_datum(xd);
- } else {
- list_add(&xd->xindex, &c->xattr_dead_list);
+ unload_xattr_datum(c, xd);
+ xd->flags |= JFFS2_XFLAGS_DEAD;
+ if (xd->node == (void *)xd) {
+ BUG_ON(!(xd->flags & JFFS2_XFLAGS_INVALID));
+ jffs2_free_xattr_datum(xd);
+ } else {
+ list_add(&xd->xindex, &c->xattr_dead_list);
+ }
+ spin_unlock(&c->erase_completion_lock);
+
+ dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xid, version);
}
- spin_unlock(&c->erase_completion_lock);
- dbg_xattr("xdatum(xid=%u, version=%u) was removed.\n", xd->xid, xd->version);
}
/* -------- xref related functions ------------------
@@ -580,8 +583,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) was removed.\n",
ref->ino, ref->xid, ref->xseqno);
- if (atomic_dec_and_test(&xd->refcnt))
- delete_xattr_datum(c, xd);
+ unrefer_xattr_datum(c, xd);
}
void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
@@ -1119,8 +1121,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
ref->next = c->xref_dead_list;
c->xref_dead_list = ref;
spin_unlock(&c->erase_completion_lock);
- if (atomic_dec_and_test(&xd->refcnt))
- delete_xattr_datum(c, xd);
+ unrefer_xattr_datum(c, xd);
} else {
ref->ic = ic;
ref->xd = xd;
@@ -1156,8 +1157,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
down_write(&c->xattr_sem);
if (rc) {
JFFS2_WARNING("jffs2_reserve_space()=%d, request=%u\n", rc, request);
- if (atomic_dec_and_test(&xd->refcnt))
- delete_xattr_datum(c, xd);
+ unrefer_xattr_datum(c, xd);
up_write(&c->xattr_sem);
return rc;
}
@@ -1170,8 +1170,7 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname,
ic->xref = ref;
}
rc = PTR_ERR(newref);
- if (atomic_dec_and_test(&xd->refcnt))
- delete_xattr_datum(c, xd);
+ unrefer_xattr_datum(c, xd);
} else if (ref) {
delete_xattr_ref(c, ref);
}
diff --git a/fs/namei.c b/fs/namei.c
index c784e8bb57a3..c9750d755aff 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1423,7 +1423,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
struct dentry *p;
if (p1 == p2) {
- mutex_lock(&p1->d_inode->i_mutex);
+ mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
return NULL;
}
@@ -1431,22 +1431,22 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
for (p = p1; p->d_parent != p; p = p->d_parent) {
if (p->d_parent == p2) {
- mutex_lock(&p2->d_inode->i_mutex);
- mutex_lock(&p1->d_inode->i_mutex);
+ mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
return p;
}
}
for (p = p2; p->d_parent != p; p = p->d_parent) {
if (p->d_parent == p1) {
- mutex_lock(&p1->d_inode->i_mutex);
- mutex_lock(&p2->d_inode->i_mutex);
+ mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
return p;
}
}
- mutex_lock(&p1->d_inode->i_mutex);
- mutex_lock(&p2->d_inode->i_mutex);
+ mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
return NULL;
}
@@ -1751,7 +1751,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
- mutex_lock(&nd->dentry->d_inode->i_mutex);
+ mutex_lock_nested(&nd->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
/*
* Yucky last component or no last component at all?
* (foo/., foo/.., /////)
@@ -2008,7 +2008,7 @@ static long do_rmdir(int dfd, const char __user *pathname)
error = -EBUSY;
goto exit1;
}
- mutex_lock(&nd.dentry->d_inode->i_mutex);
+ mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
@@ -2082,7 +2082,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
- mutex_lock(&nd.dentry->d_inode->i_mutex);
+ mutex_lock_nested(&nd.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4c86b7e1d1eb..d313f356e66a 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -367,6 +367,12 @@ static void ntfs_destroy_extent_inode(ntfs_inode *ni)
kmem_cache_free(ntfs_inode_cache, ni);
}
+/*
+ * The attribute runlist lock has separate locking rules from the
+ * normal runlist lock, so split the two lock-classes:
+ */
+static struct lock_class_key attr_list_rl_lock_class;
+
/**
* __ntfs_init_inode - initialize ntfs specific part of an inode
* @sb: super block of mounted volume
@@ -394,6 +400,8 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
ni->attr_list_size = 0;
ni->attr_list = NULL;
ntfs_init_runlist(&ni->attr_list_rl);
+ lockdep_set_class(&ni->attr_list_rl.lock,
+ &attr_list_rl_lock_class);
ni->itype.index.bmp_ino = NULL;
ni->itype.index.block_size = 0;
ni->itype.index.vcn_size = 0;
@@ -405,6 +413,13 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
ni->ext.base_ntfs_ino = NULL;
}
+/*
+ * Extent inodes get MFT-mapped in a nested way, while the base inode
+ * is still mapped. Teach this nesting to the lock validator by creating
+ * a separate class for nested inode's mrec_lock's:
+ */
+static struct lock_class_key extent_inode_mrec_lock_key;
+
inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
unsigned long mft_no)
{
@@ -413,6 +428,7 @@ inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
ntfs_debug("Entering.");
if (likely(ni != NULL)) {
__ntfs_init_inode(sb, ni);
+ lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
ni->mft_no = mft_no;
ni->type = AT_UNUSED;
ni->name = NULL;
@@ -1722,6 +1738,15 @@ err_out:
return err;
}
+/*
+ * The MFT inode has special locking, so teach the lock validator
+ * about this by splitting off the locking rules of the MFT from
+ * the locking rules of other inodes. The MFT inode can never be
+ * accessed from the VFS side (or even internally), only by the
+ * map_mft functions.
+ */
+static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
+
/**
* ntfs_read_inode_mount - special read_inode for mount time use only
* @vi: inode to read
@@ -2148,6 +2173,14 @@ int ntfs_read_inode_mount(struct inode *vi)
ntfs_attr_put_search_ctx(ctx);
ntfs_debug("Done.");
ntfs_free(m);
+
+ /*
+ * Split the locking rules of the MFT inode from the
+ * locking rules of other inodes:
+ */
+ lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
+ lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
+
return 0;
em_put_err_out:
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0e14acea3f8b..74e0ee8fce72 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1724,6 +1724,14 @@ upcase_failed:
return FALSE;
}
+/*
+ * The lcn and mft bitmap inodes are NTFS-internal inodes with
+ * their own special locking rules:
+ */
+static struct lock_class_key
+ lcnbmp_runlist_lock_key, lcnbmp_mrec_lock_key,
+ mftbmp_runlist_lock_key, mftbmp_mrec_lock_key;
+
/**
* load_system_files - open the system files using normal functions
* @vol: ntfs super block describing device whose system files to load
@@ -1780,6 +1788,10 @@ static BOOL load_system_files(ntfs_volume *vol)
ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute.");
goto iput_mirr_err_out;
}
+ lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->runlist.lock,
+ &mftbmp_runlist_lock_key);
+ lockdep_set_class(&NTFS_I(vol->mftbmp_ino)->mrec_lock,
+ &mftbmp_mrec_lock_key);
/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
if (!load_and_init_upcase(vol))
goto iput_mftbmp_err_out;
@@ -1802,6 +1814,11 @@ static BOOL load_system_files(ntfs_volume *vol)
iput(vol->lcnbmp_ino);
goto bitmap_failed;
}
+ lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->runlist.lock,
+ &lcnbmp_runlist_lock_key);
+ lockdep_set_class(&NTFS_I(vol->lcnbmp_ino)->mrec_lock,
+ &lcnbmp_mrec_lock_key);
+
NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino));
if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) {
iput(vol->lcnbmp_ino);
@@ -2743,6 +2760,17 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
struct inode *tmp_ino;
int blocksize, result;
+ /*
+ * We do a pretty difficult piece of bootstrap by reading the
+ * MFT (and other metadata) from disk into memory. We'll only
+ * release this metadata during umount, so the locking patterns
+ * observed during bootstrap do not count. So turn off the
+ * observation of locking patterns (strictly for this context
+ * only) while mounting NTFS. [The validator is still active
+ * otherwise, even for this context: it will for example record
+ * lock class registrations.]
+ */
+ lockdep_off();
ntfs_debug("Entering.");
#ifndef NTFS_RW
sb->s_flags |= MS_RDONLY;
@@ -2754,6 +2782,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
if (!silent)
ntfs_error(sb, "Allocation of NTFS volume structure "
"failed. Aborting mount...");
+ lockdep_on();
return -ENOMEM;
}
/* Initialize ntfs_volume structure. */
@@ -2940,6 +2969,7 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
mutex_unlock(&ntfs_lock);
sb->s_export_op = &ntfs_export_ops;
lock_kernel();
+ lockdep_on();
return 0;
}
ntfs_error(sb, "Failed to allocate root directory.");
@@ -3059,6 +3089,7 @@ err_out_now:
sb->s_fs_info = NULL;
kfree(vol);
ntfs_debug("Failed, returning -EINVAL.");
+ lockdep_on();
return -EINVAL;
}
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index af69f28277b6..4616ed50ffcd 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -107,7 +107,7 @@ int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount *
{
struct vm_list_struct *vml;
struct vm_area_struct *vma;
- struct task_struct *task = proc_task(inode);
+ struct task_struct *task = get_proc_task(inode);
struct mm_struct *mm = get_task_mm(task);
int result = -ENOENT;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 28eb3c886034..5567328f1041 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2203,7 +2203,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
size_t towrite = len;
struct buffer_head tmp_bh, *bh;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
diff --git a/fs/super.c b/fs/super.c
index 9b780c42d845..6d4e8174b6db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -53,7 +53,7 @@ DEFINE_SPINLOCK(sb_lock);
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
-static struct super_block *alloc_super(void)
+static struct super_block *alloc_super(struct file_system_type *type)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static struct super_operations default_op;
@@ -72,6 +72,13 @@ static struct super_block *alloc_super(void)
INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
+ lockdep_set_class(&s->s_umount, &type->s_umount_key);
+ /*
+ * The locking rules for s_lock are up to the
+ * filesystem. For example ext3fs has different
+ * lock ordering than usbfs:
+ */
+ lockdep_set_class(&s->s_lock, &type->s_lock_key);
down_write(&s->s_umount);
s->s_count = S_BIAS;
atomic_set(&s->s_active, 1);
@@ -295,7 +302,7 @@ retry:
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super();
+ s = alloc_super(type);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 19a99726e58d..992ee0b87cc3 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1326,7 +1326,7 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
size_t towrite = len;
struct buffer_head *bh;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;