summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-10-11 12:39:35 -0700
committerDavid S. Miller <davem@davemloft.net>2008-10-11 12:39:35 -0700
commit56c5d900dbb8e042bfad035d18433476931d8f93 (patch)
tree00b793965beeef10db03e0ff021d2d965c410759 /fs
parent4dd95b63ae25c5cad6986829b5e8788e9faa0330 (diff)
parentead9d23d803ea3a73766c3cb27bf7563ac8d7266 (diff)
downloadlwn-56c5d900dbb8e042bfad035d18433476931d8f93.tar.gz
lwn-56c5d900dbb8e042bfad035d18433476931d8f93.zip
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: sound/core/memalloc.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/bio-integrity.c29
-rw-r--r--fs/bio.c297
-rw-r--r--fs/block_dev.c182
-rw-r--r--fs/cifs/cifs_spnego.c35
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/cifsproto.h4
-rw-r--r--fs/cifs/cifssmb.c84
-rw-r--r--fs/cifs/dns_resolve.c74
-rw-r--r--fs/cifs/file.c130
-rw-r--r--fs/cifs/inode.c644
-rw-r--r--fs/cifs/misc.c6
-rw-r--r--fs/cifs/readdir.c128
-rw-r--r--fs/cifs/sess.c4
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/dlm/config.c77
-rw-r--r--fs/dlm/dlm_internal.h7
-rw-r--r--fs/dlm/lockspace.c158
-rw-r--r--fs/dlm/lockspace.h1
-rw-r--r--fs/dlm/user.c124
-rw-r--r--fs/dlm/user.h4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/fat/fatent.c14
-rw-r--r--fs/gfs2/glock.c15
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/incore.h38
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/locking/dlm/mount.c3
-rw-r--r--fs/gfs2/log.c21
-rw-r--r--fs/gfs2/mount.c7
-rw-r--r--fs/gfs2/ops_address.c18
-rw-r--r--fs/gfs2/ops_file.c16
-rw-r--r--fs/gfs2/ops_fstype.c578
-rw-r--r--fs/gfs2/ops_inode.c127
-rw-r--r--fs/gfs2/ops_super.c108
-rw-r--r--fs/gfs2/super.c340
-rw-r--r--fs/gfs2/super.h6
-rw-r--r--fs/gfs2/sys.c11
-rw-r--r--fs/inotify_user.c27
-rw-r--r--fs/partitions/check.c268
-rw-r--r--fs/partitions/check.h4
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/splice.c3
-rw-r--r--fs/ubifs/debug.c2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/find.c1
-rw-r--r--fs/ubifs/gc.c14
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/ubifs/tnc.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c20
-rw-r--r--fs/xfs/xfs_buf_item.c44
-rw-r--r--fs/xfs/xfs_dfrag.c9
-rw-r--r--fs/xfs/xfs_inode.c94
-rw-r--r--fs/xfs/xfs_log.c67
-rw-r--r--fs/xfs/xfs_log_priv.h1
-rw-r--r--fs/xfs/xfs_vnodeops.c26
62 files changed, 2241 insertions, 1836 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c95295c65045..e83aa5ebe861 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
return NULL;
error:
- if (fid)
- p9_client_clunk(fid);
+ p9_client_clunk(fid);
return ERR_PTR(result);
}
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b35fe6..19caf7c962ac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
BUG_ON(bip == NULL);
/* A cloned bio doesn't own the integrity metadata */
- if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+ if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
+ && bip->bip_buf != NULL)
kfree(bip->bip_buf);
mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);
+static int bdev_integrity_enabled(struct block_device *bdev, int rw)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+ if (bi == NULL)
+ return 0;
+
+ if (rw == READ && bi->verify_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_READ))
+ return 1;
+
+ if (rw == WRITE && bi->generate_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_WRITE))
+ return 1;
+
+ return 0;
+}
+
/**
* bio_integrity_enabled - Check whether integrity can be passed
* @bio: bio to check
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio)
}
}
+static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
+{
+ if (bi)
+ return bi->tuple_size;
+
+ return 0;
+}
+
/**
* bio_integrity_prep - Prepare bio for integrity I/O
* @bio: bio to prepare
diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae34d75..77a55bcceedb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
static struct kmem_cache *bio_slab __read_mostly;
-mempool_t *bio_split_pool __read_mostly;
+static mempool_t *bio_split_pool __read_mostly;
/*
* if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
struct bio_vec *bvl;
/*
- * see comment near bvec_array define!
+ * If 'bs' is given, lookup the pool and do the mempool alloc.
+ * If not, this is a bio_kmalloc() allocation and just do a
+ * kzalloc() for the exact number of vecs right away.
*/
- switch (nr) {
- case 1 : *idx = 0; break;
- case 2 ... 4: *idx = 1; break;
- case 5 ... 16: *idx = 2; break;
- case 17 ... 64: *idx = 3; break;
- case 65 ... 128: *idx = 4; break;
- case 129 ... BIO_MAX_PAGES: *idx = 5; break;
+ if (bs) {
+ /*
+ * see comment near bvec_array define!
+ */
+ switch (nr) {
+ case 1:
+ *idx = 0;
+ break;
+ case 2 ... 4:
+ *idx = 1;
+ break;
+ case 5 ... 16:
+ *idx = 2;
+ break;
+ case 17 ... 64:
+ *idx = 3;
+ break;
+ case 65 ... 128:
+ *idx = 4;
+ break;
+ case 129 ... BIO_MAX_PAGES:
+ *idx = 5;
+ break;
default:
return NULL;
- }
- /*
- * idx now points to the pool we want to allocate from
- */
+ }
- bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl)
- memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+ /*
+ * idx now points to the pool we want to allocate from
+ */
+ bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
+ if (bvl)
+ memset(bvl, 0,
+ bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+ } else
+ bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
return bvl;
}
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio)
bio_free(bio, fs_bio_set);
}
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+ kfree(bio->bi_io_vec);
+ kfree(bio);
+}
+
void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
+ bio->bi_comp_cpu = -1;
atomic_set(&bio->bi_cnt, 1);
}
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio)
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
- * @bs: the bio_set to allocate from
+ * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
*
* Description:
- * bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
+ * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
* If %__GFP_WAIT is set then we will block on the internal pool waiting
- * for a &struct bio to become free.
+ * for a &struct bio to become free. If a %NULL @bs is passed in, we will
+ * fall back to just using @kmalloc to allocate the required memory.
*
* allocate bio and iovecs from the memory pools specified by the
- * bio_set structure.
+ * bio_set structure, or @kmalloc if none given.
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
- struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+ struct bio *bio;
+
+ if (bs)
+ bio = mempool_alloc(bs->bio_pool, gfp_mask);
+ else
+ bio = kmalloc(sizeof(*bio), gfp_mask);
if (likely(bio)) {
struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
if (unlikely(!bvl)) {
- mempool_free(bio, bs->bio_pool);
+ if (bs)
+ mempool_free(bio, bs->bio_pool);
+ else
+ kfree(bio);
bio = NULL;
goto out;
}
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
return bio;
}
+/*
+ * Like bio_alloc(), but doesn't use a mempool backing. This means that
+ * it CAN fail, but while bio_alloc() can only be used for allocations
+ * that have a short (finite) life span, bio_kmalloc() should be used
+ * for more permanent bio allocations (like allocating some bio's for
+ * initalization or setup purposes).
+ */
+struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
+{
+ struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+
+ if (bio)
+ bio->bi_destructor = bio_kmalloc_destructor;
+
+ return bio;
+}
+
void zero_fill_bio(struct bio *bio)
{
unsigned long flags;
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
return bio->bi_phys_segments;
}
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_hw_segments;
-}
-
/**
* __bio_clone - clone a bio
* @bio: destination bio
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
*/
while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_hw_segments >= q->max_hw_segments
- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
+ || bio->bi_phys_segments >= q->max_hw_segments) {
if (retried_segments)
return 0;
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
bio->bi_vcnt++;
bio->bi_phys_segments++;
- bio->bi_hw_segments++;
done:
bio->bi_size += len;
return len;
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
struct bio_map_data {
struct bio_vec *iovecs;
- int nr_sgvecs;
struct sg_iovec *sgvecs;
+ int nr_sgvecs;
+ int is_our_pages;
};
static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- struct sg_iovec *iov, int iov_count)
+ struct sg_iovec *iov, int iov_count,
+ int is_our_pages)
{
memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
bmd->nr_sgvecs = iov_count;
+ bmd->is_our_pages = is_our_pages;
bio->bi_private = bmd;
}
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
}
static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
- struct sg_iovec *iov, int iov_count, int uncopy)
+ struct sg_iovec *iov, int iov_count, int uncopy,
+ int do_free_page)
{
int ret = 0, i;
struct bio_vec *bvec;
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
}
}
- if (uncopy)
+ if (do_free_page)
__free_page(bvec->bv_page);
}
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
- int ret;
-
- ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
+ int ret = 0;
+ if (!bio_flagged(bio, BIO_NULL_MAPPED))
+ ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+ bmd->nr_sgvecs, 1, bmd->is_our_pages);
bio_free_map_data(bmd);
bio_put(bio);
return ret;
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio)
/**
* bio_copy_user_iov - copy user data to bio
* @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
* @iov: the iovec.
* @iov_count: number of elements in the iovec
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
-struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
- int iov_count, int write_to_vm)
+struct bio *bio_copy_user_iov(struct request_queue *q,
+ struct rq_map_data *map_data,
+ struct sg_iovec *iov, int iov_count,
+ int write_to_vm, gfp_t gfp_mask)
{
struct bio_map_data *bmd;
struct bio_vec *bvec;
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
len += iov[i].iov_len;
}
- bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
+ bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ bio = bio_alloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;
bio->bi_rw |= (!write_to_vm << BIO_RW);
ret = 0;
+ i = 0;
while (len) {
- unsigned int bytes = PAGE_SIZE;
+ unsigned int bytes;
+
+ if (map_data)
+ bytes = 1U << (PAGE_SHIFT + map_data->page_order);
+ else
+ bytes = PAGE_SIZE;
if (bytes > len)
bytes = len;
- page = alloc_page(q->bounce_gfp | GFP_KERNEL);
+ if (map_data) {
+ if (i == map_data->nr_entries) {
+ ret = -ENOMEM;
+ break;
+ }
+ page = map_data->pages[i++];
+ } else
+ page = alloc_page(q->bounce_gfp | gfp_mask);
if (!page) {
ret = -ENOMEM;
break;
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
* success
*/
if (!write_to_vm) {
- ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
+ ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
if (ret)
goto cleanup;
}
- bio_set_map_data(bmd, bio, iov, iov_count);
+ bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
return bio;
cleanup:
- bio_for_each_segment(bvec, bio, i)
- __free_page(bvec->bv_page);
+ if (!map_data)
+ bio_for_each_segment(bvec, bio, i)
+ __free_page(bvec->bv_page);
bio_put(bio);
out_bmd:
@@ -654,29 +720,32 @@ out_bmd:
/**
* bio_copy_user - copy user data to bio
* @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
-struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
- unsigned int len, int write_to_vm)
+struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
+ unsigned long uaddr, unsigned int len,
+ int write_to_vm, gfp_t gfp_mask)
{
struct sg_iovec iov;
iov.iov_base = (void __user *)uaddr;
iov.iov_len = len;
- return bio_copy_user_iov(q, &iov, 1, write_to_vm);
+ return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
}
static struct bio *__bio_map_user_iov(struct request_queue *q,
struct block_device *bdev,
struct sg_iovec *iov, int iov_count,
- int write_to_vm)
+ int write_to_vm, gfp_t gfp_mask)
{
int i, j;
int nr_pages = 0;
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!nr_pages)
return ERR_PTR(-EINVAL);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ bio = bio_alloc(gfp_mask, nr_pages);
if (!bio)
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
if (!pages)
goto out;
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len, int write_to_vm)
+ unsigned long uaddr, unsigned int len, int write_to_vm,
+ gfp_t gfp_mask)
{
struct sg_iovec iov;
iov.iov_base = (void __user *)uaddr;
iov.iov_len = len;
- return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+ return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
}
/**
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
* @iov: the iovec.
* @iov_count: number of elements in the iovec
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
struct sg_iovec *iov, int iov_count,
- int write_to_vm)
+ int write_to_vm, gfp_t gfp_mask)
{
struct bio *bio;
- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
-
+ bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
+ gfp_mask);
if (IS_ERR(bio))
return bio;
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask, int reading)
{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
struct bio *bio;
struct bio_vec *bvec;
- struct bio_map_data *bmd;
- int i, ret;
- struct sg_iovec iov;
-
- iov.iov_base = data;
- iov.iov_len = len;
-
- bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- ret = -ENOMEM;
- bio = bio_alloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- goto cleanup;
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
- ret = -EINVAL;
- goto cleanup;
- }
+ int i;
- len -= bytes;
- }
+ bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
+ if (IS_ERR(bio))
+ return bio;
if (!reading) {
void *p = data;
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
}
}
- bio->bi_private = bmd;
bio->bi_end_io = bio_copy_kern_endio;
- bio_set_map_data(bmd, bio, &iov, 1);
return bio;
-cleanup:
- bio_for_each_segment(bvec, bio, i)
- __free_page(bvec->bv_page);
-
- bio_put(bio);
-out_bmd:
- bio_free_map_data(bmd);
-
- return ERR_PTR(ret);
}
/*
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err)
* split a bio - only worry about a bio with a single page
* in it's iovec
*/
-struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
+struct bio_pair *bio_split(struct bio *bi, int first_sectors)
{
- struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
+ struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
if (!bp)
return bp;
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
bp->bio2.bi_end_io = bio_pair_end_2;
bp->bio1.bi_private = bi;
- bp->bio2.bi_private = pool;
+ bp->bio2.bi_private = bio_split_pool;
if (bio_integrity(bi))
bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
return bp;
}
+/**
+ * bio_sector_offset - Find hardware sector offset in bio
+ * @bio: bio to inspect
+ * @index: bio_vec index
+ * @offset: offset in bv_page
+ *
+ * Return the number of hardware sectors between beginning of bio
+ * and an end point indicated by a bio_vec index and an offset
+ * within that vector's page.
+ */
+sector_t bio_sector_offset(struct bio *bio, unsigned short index,
+ unsigned int offset)
+{
+ unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+ struct bio_vec *bv;
+ sector_t sectors;
+ int i;
+
+ sectors = 0;
+
+ if (index >= bio->bi_idx)
+ index = bio->bi_vcnt - 1;
+
+ __bio_for_each_segment(bv, bio, i, 0) {
+ if (i == index) {
+ if (offset > bv->bv_offset)
+ sectors += (offset - bv->bv_offset) / sector_sz;
+ break;
+ }
+
+ sectors += bv->bv_len / sector_sz;
+ }
+
+ return sectors;
+}
+EXPORT_SYMBOL(bio_sector_offset);
/*
* create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@ static int __init init_bio(void)
subsys_initcall(init_bio);
EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_kmalloc);
EXPORT_SYMBOL(bio_put);
EXPORT_SYMBOL(bio_free);
EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init);
EXPORT_SYMBOL(__bio_clone);
EXPORT_SYMBOL(bio_clone);
EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
EXPORT_SYMBOL(bio_add_page);
EXPORT_SYMBOL(bio_add_pc_page);
EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern);
EXPORT_SYMBOL(bio_copy_kern);
EXPORT_SYMBOL(bio_pair_release);
EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_split_pool);
EXPORT_SYMBOL(bio_copy_user);
EXPORT_SYMBOL(bio_uncopy_user);
EXPORT_SYMBOL(bioset_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff54219e049..d84f0469a016 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release);
* /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
*/
-static struct kobject *bdev_get_kobj(struct block_device *bdev)
-{
- if (bdev->bd_contains != bdev)
- return kobject_get(&bdev->bd_part->dev.kobj);
- else
- return kobject_get(&bdev->bd_disk->dev.kobj);
-}
-
-static struct kobject *bdev_get_holder(struct block_device *bdev)
-{
- if (bdev->bd_contains != bdev)
- return kobject_get(bdev->bd_part->holder_dir);
- else
- return kobject_get(bdev->bd_disk->holder_dir);
-}
-
static int add_symlink(struct kobject *from, struct kobject *to)
{
if (!from || !to)
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev,
if (!bo->hdev)
goto fail_put_sdir;
- bo->sdev = bdev_get_kobj(bdev);
+ bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
if (!bo->sdev)
goto fail_put_hdev;
- bo->hdir = bdev_get_holder(bdev);
+ bo->hdir = kobject_get(bdev->bd_part->holder_dir);
if (!bo->hdir)
goto fail_put_sdev;
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
EXPORT_SYMBOL(open_by_devnum);
+/**
+ * flush_disk - invalidates all buffer-cache entries on a disk
+ *
+ * @bdev: struct block device to be flushed
+ *
+ * Invalidates all buffer-cache entries on a disk. It should be called
+ * when a disk has been changed -- either by a media change or online
+ * resize.
+ */
+static void flush_disk(struct block_device *bdev)
+{
+ if (__invalidate_device(bdev)) {
+ char name[BDEVNAME_SIZE] = "";
+
+ if (bdev->bd_disk)
+ disk_name(bdev->bd_disk, 0, name);
+ printk(KERN_WARNING "VFS: busy inodes on changed media or "
+ "resized disk %s\n", name);
+ }
+
+ if (!bdev->bd_disk)
+ return;
+ if (disk_partitionable(bdev->bd_disk))
+ bdev->bd_invalidated = 1;
+}
+
+/**
+ * check_disk_size_change - checks for disk size change and adjusts bdev size.
+ * @disk: struct gendisk to check
+ * @bdev: struct bdev to adjust.
+ *
+ * This routine checks to see if the bdev size does not match the disk size
+ * and adjusts it if it differs.
+ */
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+{
+ loff_t disk_size, bdev_size;
+
+ disk_size = (loff_t)get_capacity(disk) << 9;
+ bdev_size = i_size_read(bdev->bd_inode);
+ if (disk_size != bdev_size) {
+ char name[BDEVNAME_SIZE];
+
+ disk_name(disk, 0, name);
+ printk(KERN_INFO
+ "%s: detected capacity change from %lld to %lld\n",
+ name, bdev_size, disk_size);
+ i_size_write(bdev->bd_inode, disk_size);
+ flush_disk(bdev);
+ }
+}
+EXPORT_SYMBOL(check_disk_size_change);
+
+/**
+ * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
+ * @disk: struct gendisk to be revalidated
+ *
+ * This routine is a wrapper for lower-level driver's revalidate_disk
+ * call-backs. It is used to do common pre and post operations needed
+ * for all revalidate_disk operations.
+ */
+int revalidate_disk(struct gendisk *disk)
+{
+ struct block_device *bdev;
+ int ret = 0;
+
+ if (disk->fops->revalidate_disk)
+ ret = disk->fops->revalidate_disk(disk);
+
+ bdev = bdget_disk(disk, 0);
+ if (!bdev)
+ return ret;
+
+ mutex_lock(&bdev->bd_mutex);
+ check_disk_size_change(disk, bdev);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return ret;
+}
+EXPORT_SYMBOL(revalidate_disk);
+
/*
* This routine checks whether a removable media has been changed,
* and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev)
if (!bdops->media_changed(bdev->bd_disk))
return 0;
- if (__invalidate_device(bdev))
- printk("VFS: busy inodes on changed media.\n");
-
+ flush_disk(bdev);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
- if (bdev->bd_disk->minors > 1)
- bdev->bd_invalidated = 1;
return 1;
}
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
static int do_open(struct block_device *bdev, struct file *file, int for_part)
{
- struct module *owner = NULL;
struct gendisk *disk;
+ struct hd_struct *part = NULL;
int ret;
- int part;
+ int partno;
int perm = 0;
if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
ret = -ENXIO;
file->f_mapping = bdev->bd_inode->i_mapping;
+
lock_kernel();
- disk = get_gendisk(bdev->bd_dev, &part);
- if (!disk) {
- unlock_kernel();
- bdput(bdev);
- return ret;
- }
- owner = disk->fops->owner;
+
+ disk = get_gendisk(bdev->bd_dev, &partno);
+ if (!disk)
+ goto out_unlock_kernel;
+ part = disk_get_part(disk, partno);
+ if (!part)
+ goto out_unlock_kernel;
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
bdev->bd_disk = disk;
+ bdev->bd_part = part;
bdev->bd_contains = bdev;
- if (!part) {
+ if (!partno) {
struct backing_dev_info *bdi;
if (disk->fops->open) {
ret = disk->fops->open(bdev->bd_inode, file);
if (ret)
- goto out_first;
+ goto out_clear;
}
if (!bdev->bd_openers) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
if (bdev->bd_invalidated)
rescan_partitions(disk, bdev);
} else {
- struct hd_struct *p;
struct block_device *whole;
whole = bdget_disk(disk, 0);
ret = -ENOMEM;
if (!whole)
- goto out_first;
+ goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
if (ret)
- goto out_first;
+ goto out_clear;
bdev->bd_contains = whole;
- p = disk->part[part - 1];
bdev->bd_inode->i_data.backing_dev_info =
whole->bd_inode->i_data.backing_dev_info;
- if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
+ if (!(disk->flags & GENHD_FL_UP) ||
+ !part || !part->nr_sects) {
ret = -ENXIO;
- goto out_first;
+ goto out_clear;
}
- kobject_get(&p->dev.kobj);
- bdev->bd_part = p;
- bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+ bd_set_size(bdev, (loff_t)part->nr_sects << 9);
}
} else {
+ disk_put_part(part);
put_disk(disk);
- module_put(owner);
+ module_put(disk->fops->owner);
+ part = NULL;
+ disk = NULL;
if (bdev->bd_contains == bdev) {
if (bdev->bd_disk->fops->open) {
ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
if (ret)
- goto out;
+ goto out_unlock_bdev;
}
if (bdev->bd_invalidated)
rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
unlock_kernel();
return 0;
-out_first:
+ out_clear:
bdev->bd_disk = NULL;
+ bdev->bd_part = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, 1);
bdev->bd_contains = NULL;
- put_disk(disk);
- module_put(owner);
-out:
+ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
+ out_unlock_kernel:
unlock_kernel();
- if (ret)
- bdput(bdev);
+
+ disk_put_part(part);
+ if (disk)
+ module_put(disk->fops->owner);
+ put_disk(disk);
+ bdput(bdev);
+
return ret;
}
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
put_disk(disk);
module_put(owner);
-
- if (bdev->bd_contains != bdev) {
- kobject_put(&bdev->bd_part->dev.kobj);
- bdev->bd_part = NULL;
- }
+ disk_put_part(bdev->bd_part);
+ bdev->bd_part = NULL;
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev);
/**
* lookup_bdev - lookup a struct block_device by name
+ * @pathname: special file representing the block device
*
- * @path: special file representing the block device
- *
- * Get a reference to the blockdevice at @path in the current
+ * Get a reference to the blockdevice at @pathname in the current
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 117ef4bba68e..fcee9298b620 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,11 +66,28 @@ struct key_type cifs_spnego_key_type = {
.describe = user_describe,
};
-#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
- strlen("ver=0xFF") */
-#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
- in future could have strlen(";sec=ntlmsspi") */
-#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
+/* length of longest version string e.g. strlen("ver=0xFF") */
+#define MAX_VER_STR_LEN 8
+
+/* length of longest security mechanism name, eg in future could have
+ * strlen(";sec=ntlmsspi") */
+#define MAX_MECH_STR_LEN 13
+
+/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
+#define MAX_IPV6_ADDR_LEN 42
+
+/* strlen of "host=" */
+#define HOST_KEY_LEN 5
+
+/* strlen of ";ip4=" or ";ip6=" */
+#define IP_KEY_LEN 5
+
+/* strlen of ";uid=0x" */
+#define UID_KEY_LEN 7
+
+/* strlen of ";user=" */
+#define USER_KEY_LEN 6
+
/* get a key struct with a SPNEGO security blob, suitable for session setup */
struct key *
cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
@@ -84,11 +101,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
/* length of fields (with semicolons): ver=0xyz ip4=ipaddress
host=hostname sec=mechanism uid=0xFF user=username */
desc_len = MAX_VER_STR_LEN +
- 6 /* len of "host=" */ + strlen(hostname) +
- 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
+ HOST_KEY_LEN + strlen(hostname) +
+ IP_KEY_LEN + MAX_IPV6_ADDR_LEN +
MAX_MECH_STR_LEN +
- 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
- 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
+ UID_KEY_LEN + (sizeof(uid_t) * 2) +
+ USER_KEY_LEN + strlen(sesInfo->userName) + 1;
spnego_key = ERR_PTR(-ENOMEM);
description = kzalloc(desc_len, GFP_KERNEL);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 135c965c4137..f7b4a5cd837b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -41,7 +41,7 @@ extern int cifs_create(struct inode *, struct dentry *, int,
struct nameidata *);
extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
struct nameidata *);
-extern int cifs_unlink(struct inode *, struct dentry *);
+extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t);
extern int cifs_mkdir(struct inode *, struct dentry *, int);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8dfd6f24d488..0d22479d99b7 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -309,6 +309,7 @@ struct cifs_search_info {
__u32 resume_key;
char *ntwrk_buf_start;
char *srch_entries_start;
+ char *last_entry;
char *presume_name;
unsigned int resume_name_len;
bool endOfSearch:1;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index a729d083e6f4..0cff7fe986e8 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -179,6 +179,8 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
const FILE_BASIC_INFO *data, __u16 fid,
__u32 pid_of_opener);
+extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
+ bool delete_file, __u16 fid, __u32 pid_of_opener);
#if 0
extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
char *fileName, __u16 dos_attributes,
@@ -229,7 +231,7 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon,
const struct nls_table *nls_codepage,
int remap_special_chars);
extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
- int netfid, char *target_name,
+ int netfid, const char *target_name,
const struct nls_table *nls_codepage,
int remap_special_chars);
extern int CIFSCreateHardLink(const int xid,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 994de7c90474..6f4ffe15d68d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2017,7 +2017,7 @@ renameRetry:
}
int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
- int netfid, char *target_name,
+ int netfid, const char *target_name,
const struct nls_table *nls_codepage, int remap)
{
struct smb_com_transaction2_sfi_req *pSMB = NULL;
@@ -2071,7 +2071,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
remap);
}
rename_info->target_name_len = cpu_to_le32(2 * len_of_str);
- count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2;
+ count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str);
byte_count += count;
pSMB->DataCount = cpu_to_le16(count);
pSMB->TotalDataCount = pSMB->DataCount;
@@ -3614,6 +3614,8 @@ findFirstRetry:
/* BB remember to free buffer if error BB */
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
if (rc == 0) {
+ unsigned int lnoff;
+
if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
psrch_inf->unicode = true;
else
@@ -3636,6 +3638,17 @@ findFirstRetry:
le16_to_cpu(parms->SearchCount);
psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
psrch_inf->entries_in_buffer;
+ lnoff = le16_to_cpu(parms->LastNameOffset);
+ if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
+ lnoff) {
+ cERROR(1, ("ignoring corrupt resume name"));
+ psrch_inf->last_entry = NULL;
+ return rc;
+ }
+
+ psrch_inf->last_entry = psrch_inf->srch_entries_start +
+ lnoff;
+
*pnetfid = parms->SearchHandle;
} else {
cifs_buf_release(pSMB);
@@ -3725,6 +3738,8 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
rc = validate_t2((struct smb_t2_rsp *)pSMBr);
if (rc == 0) {
+ unsigned int lnoff;
+
/* BB fixme add lock for file (srch_info) struct here */
if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
psrch_inf->unicode = true;
@@ -3751,6 +3766,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
le16_to_cpu(parms->SearchCount);
psrch_inf->index_of_last_entry +=
psrch_inf->entries_in_buffer;
+ lnoff = le16_to_cpu(parms->LastNameOffset);
+ if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE <
+ lnoff) {
+ cERROR(1, ("ignoring corrupt resume name"));
+ psrch_inf->last_entry = NULL;
+ return rc;
+ } else
+ psrch_inf->last_entry =
+ psrch_inf->srch_entries_start + lnoff;
+
/* cFYI(1,("fnxt2 entries in buf %d index_of_last %d",
psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */
@@ -4876,6 +4901,61 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
return rc;
}
+int
+CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
+ bool delete_file, __u16 fid, __u32 pid_of_opener)
+{
+ struct smb_com_transaction2_sfi_req *pSMB = NULL;
+ char *data_offset;
+ int rc = 0;
+ __u16 params, param_offset, offset, byte_count, count;
+
+ cFYI(1, ("Set File Disposition (via SetFileInfo)"));
+ rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB);
+
+ if (rc)
+ return rc;
+
+ pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
+ pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
+
+ params = 6;
+ pSMB->MaxSetupCount = 0;
+ pSMB->Reserved = 0;
+ pSMB->Flags = 0;
+ pSMB->Timeout = 0;
+ pSMB->Reserved2 = 0;
+ param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4;
+ offset = param_offset + params;
+
+ data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
+
+ count = 1;
+ pSMB->MaxParameterCount = cpu_to_le16(2);
+ /* BB find max SMB PDU from sess */
+ pSMB->MaxDataCount = cpu_to_le16(1000);
+ pSMB->SetupCount = 1;
+ pSMB->Reserved3 = 0;
+ pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION);
+ byte_count = 3 /* pad */ + params + count;
+ pSMB->DataCount = cpu_to_le16(count);
+ pSMB->ParameterCount = cpu_to_le16(params);
+ pSMB->TotalDataCount = pSMB->DataCount;
+ pSMB->TotalParameterCount = pSMB->ParameterCount;
+ pSMB->ParameterOffset = cpu_to_le16(param_offset);
+ pSMB->DataOffset = cpu_to_le16(offset);
+ pSMB->Fid = fid;
+ pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
+ pSMB->Reserved4 = 0;
+ pSMB->hdr.smb_buf_length += byte_count;
+ pSMB->ByteCount = cpu_to_le16(byte_count);
+ *data_offset = delete_file ? 1 : 0;
+ rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
+ if (rc)
+ cFYI(1, ("Send error in SetFileDisposition = %d", rc));
+
+ return rc;
+}
int
CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index a2e0673e1b08..1e0c1bd8f2e4 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -29,19 +29,55 @@
#include "cifsproto.h"
#include "cifs_debug.h"
-static int dns_resolver_instantiate(struct key *key, const void *data,
+/* Checks if supplied name is IP address
+ * returns:
+ * 1 - name is IP
+ * 0 - name is not IP
+ */
+static int
+is_ip(const char *name)
+{
+ int rc;
+ struct sockaddr_in sin_server;
+ struct sockaddr_in6 sin_server6;
+
+ rc = cifs_inet_pton(AF_INET, name,
+ &sin_server.sin_addr.s_addr);
+
+ if (rc <= 0) {
+ /* not ipv4 address, try ipv6 */
+ rc = cifs_inet_pton(AF_INET6, name,
+ &sin_server6.sin6_addr.in6_u);
+ if (rc > 0)
+ return 1;
+ } else {
+ return 1;
+ }
+ /* we failed translating address */
+ return 0;
+}
+
+static int
+dns_resolver_instantiate(struct key *key, const void *data,
size_t datalen)
{
int rc = 0;
char *ip;
- ip = kmalloc(datalen+1, GFP_KERNEL);
+ ip = kmalloc(datalen + 1, GFP_KERNEL);
if (!ip)
return -ENOMEM;
memcpy(ip, data, datalen);
ip[datalen] = '\0';
+ /* make sure this looks like an address */
+ if (!is_ip((const char *) ip)) {
+ kfree(ip);
+ return -EINVAL;
+ }
+
+ key->type_data.x[0] = datalen;
rcu_assign_pointer(key->payload.data, ip);
return rc;
@@ -62,33 +98,6 @@ struct key_type key_type_dns_resolver = {
.match = user_match,
};
-/* Checks if supplied name is IP address
- * returns:
- * 1 - name is IP
- * 0 - name is not IP
- */
-static int is_ip(const char *name)
-{
- int rc;
- struct sockaddr_in sin_server;
- struct sockaddr_in6 sin_server6;
-
- rc = cifs_inet_pton(AF_INET, name,
- &sin_server.sin_addr.s_addr);
-
- if (rc <= 0) {
- /* not ipv4 address, try ipv6 */
- rc = cifs_inet_pton(AF_INET6, name,
- &sin_server6.sin6_addr.in6_u);
- if (rc > 0)
- return 1;
- } else {
- return 1;
- }
- /* we failed translating address */
- return 0;
-}
-
/* Resolves server name to ip address.
* input:
* unc - server UNC
@@ -140,6 +149,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
rkey = request_key(&key_type_dns_resolver, name, "");
if (!IS_ERR(rkey)) {
+ len = rkey->type_data.x[0];
data = rkey->payload.data;
} else {
cERROR(1, ("%s: unable to resolve: %s", __func__, name));
@@ -148,11 +158,9 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
skip_upcall:
if (data) {
- len = strlen(data);
- *ip_addr = kmalloc(len+1, GFP_KERNEL);
+ *ip_addr = kmalloc(len + 1, GFP_KERNEL);
if (*ip_addr) {
- memcpy(*ip_addr, data, len);
- (*ip_addr)[len] = '\0';
+ memcpy(*ip_addr, data, len + 1);
if (!IS_ERR(rkey))
cFYI(1, ("%s: resolved: %s to %s", __func__,
name,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cbefe1f1f9fe..c4a8a0605125 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -107,7 +107,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* want handles we can use to read with first
in the list so we do not have to walk the
- list to search for one in prepare_write */
+ list to search for one in write_begin */
if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
list_add_tail(&pCifsFile->flist,
&pCifsInode->openFileList);
@@ -915,7 +915,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
}
static ssize_t cifs_write(struct file *file, const char *write_data,
- size_t write_size, loff_t *poffset)
+ size_t write_size, loff_t *poffset)
{
int rc = 0;
unsigned int bytes_written = 0;
@@ -1065,6 +1065,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *open_file;
+ bool any_available = false;
int rc;
/* Having a null inode here (because mapping->host was set to zero by
@@ -1080,8 +1081,10 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
read_lock(&GlobalSMBSeslock);
refind_writable:
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
- if (open_file->closePend)
+ if (open_file->closePend ||
+ (!any_available && open_file->pid != current->tgid))
continue;
+
if (open_file->pfile &&
((open_file->pfile->f_flags & O_RDWR) ||
(open_file->pfile->f_flags & O_WRONLY))) {
@@ -1131,6 +1134,11 @@ refind_writable:
of the loop here. */
}
}
+ /* couldn't find useable FH with same pid, try any available */
+ if (!any_available) {
+ any_available = true;
+ goto refind_writable;
+ }
read_unlock(&GlobalSMBSeslock);
return NULL;
}
@@ -1447,49 +1455,52 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
return rc;
}
-static int cifs_commit_write(struct file *file, struct page *page,
- unsigned offset, unsigned to)
+static int cifs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
{
- int xid;
- int rc = 0;
- struct inode *inode = page->mapping->host;
- loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
- char *page_data;
+ int rc;
+ struct inode *inode = mapping->host;
- xid = GetXid();
- cFYI(1, ("commit write for page %p up to position %lld for %d",
- page, position, to));
- spin_lock(&inode->i_lock);
- if (position > inode->i_size)
- i_size_write(inode, position);
+ cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
+ page, pos, copied));
+
+ if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+ SetPageUptodate(page);
- spin_unlock(&inode->i_lock);
if (!PageUptodate(page)) {
- position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset;
- /* can not rely on (or let) writepage write this data */
- if (to < offset) {
- cFYI(1, ("Illegal offsets, can not copy from %d to %d",
- offset, to));
- FreeXid(xid);
- return rc;
- }
+ char *page_data;
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ int xid;
+
+ xid = GetXid();
/* this is probably better than directly calling
partialpage_write since in this function the file handle is
known which we might as well leverage */
/* BB check if anything else missing out of ppw
such as updating last write time */
page_data = kmap(page);
- rc = cifs_write(file, page_data + offset, to-offset,
- &position);
- if (rc > 0)
- rc = 0;
- /* else if (rc < 0) should we set writebehind rc? */
+ rc = cifs_write(file, page_data + offset, copied, &pos);
+ /* if (rc < 0) should we set writebehind rc? */
kunmap(page);
+
+ FreeXid(xid);
} else {
+ rc = copied;
+ pos += copied;
set_page_dirty(page);
}
- FreeXid(xid);
+ if (rc > 0) {
+ spin_lock(&inode->i_lock);
+ if (pos > inode->i_size)
+ i_size_write(inode, pos);
+ spin_unlock(&inode->i_lock);
+ }
+
+ unlock_page(page);
+ page_cache_release(page);
+
return rc;
}
@@ -2035,49 +2046,44 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
return true;
}
-static int cifs_prepare_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
+static int cifs_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata)
{
- int rc = 0;
- loff_t i_size;
- loff_t offset;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
- cFYI(1, ("prepare write for page %p from %d to %d", page, from, to));
- if (PageUptodate(page))
+ cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
+
+ *pagep = __grab_cache_page(mapping, index);
+ if (!*pagep)
+ return -ENOMEM;
+
+ if (PageUptodate(*pagep))
return 0;
/* If we are writing a full page it will be up to date,
no need to read from the server */
- if ((to == PAGE_CACHE_SIZE) && (from == 0)) {
- SetPageUptodate(page);
+ if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE)
return 0;
- }
- offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
- i_size = i_size_read(page->mapping->host);
+ if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
+ int rc;
- if ((offset >= i_size) ||
- ((from == 0) && (offset + to) >= i_size)) {
- /*
- * We don't need to read data beyond the end of the file.
- * zero it, and set the page uptodate
- */
- simple_prepare_write(file, page, from, to);
- SetPageUptodate(page);
- } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
/* might as well read a page, it is fast enough */
- rc = cifs_readpage_worker(file, page, &offset);
+ rc = cifs_readpage_worker(file, *pagep, &offset);
+
+ /* we do not need to pass errors back
+ e.g. if we do not have read access to the file
+ because cifs_write_end will attempt synchronous writes
+ -- shaggy */
} else {
/* we could try using another file handle if there is one -
but how would we lock it to prevent close of that handle
racing with this read? In any case
- this will be written out by commit_write so is fine */
+ this will be written out by write_end so is fine */
}
- /* we do not need to pass errors back
- e.g. if we do not have read access to the file
- because cifs_commit_write will do the right thing. -- shaggy */
-
return 0;
}
@@ -2086,8 +2092,8 @@ const struct address_space_operations cifs_addr_ops = {
.readpages = cifs_readpages,
.writepage = cifs_writepage,
.writepages = cifs_writepages,
- .prepare_write = cifs_prepare_write,
- .commit_write = cifs_commit_write,
+ .write_begin = cifs_write_begin,
+ .write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
@@ -2102,8 +2108,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
.readpage = cifs_readpage,
.writepage = cifs_writepage,
.writepages = cifs_writepages,
- .prepare_write = cifs_prepare_write,
- .commit_write = cifs_commit_write,
+ .write_begin = cifs_write_begin,
+ .write_end = cifs_write_end,
.set_page_dirty = __set_page_dirty_nobuffers,
/* .sync_page = cifs_sync_page, */
/* .direct_IO = */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9c548f110102..a8c833345fc9 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -665,40 +665,201 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino)
return inode;
}
-int cifs_unlink(struct inode *inode, struct dentry *direntry)
+static int
+cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
+ char *full_path, __u32 dosattr)
+{
+ int rc;
+ int oplock = 0;
+ __u16 netfid;
+ __u32 netpid;
+ bool set_time = false;
+ struct cifsFileInfo *open_file;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ FILE_BASIC_INFO info_buf;
+
+ if (attrs->ia_valid & ATTR_ATIME) {
+ set_time = true;
+ info_buf.LastAccessTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
+ } else
+ info_buf.LastAccessTime = 0;
+
+ if (attrs->ia_valid & ATTR_MTIME) {
+ set_time = true;
+ info_buf.LastWriteTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
+ } else
+ info_buf.LastWriteTime = 0;
+
+ /*
+ * Samba throws this field away, but windows may actually use it.
+ * Do not set ctime unless other time stamps are changed explicitly
+ * (i.e. by utimes()) since we would then have a mix of client and
+ * server times.
+ */
+ if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
+ cFYI(1, ("CIFS - CTIME changed"));
+ info_buf.ChangeTime =
+ cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
+ } else
+ info_buf.ChangeTime = 0;
+
+ info_buf.CreationTime = 0; /* don't change */
+ info_buf.Attributes = cpu_to_le32(dosattr);
+
+ /*
+ * If the file is already open for write, just use that fileid
+ */
+ open_file = find_writable_file(cifsInode);
+ if (open_file) {
+ netfid = open_file->netfid;
+ netpid = open_file->pid;
+ goto set_via_filehandle;
+ }
+
+ /*
+ * NT4 apparently returns success on this call, but it doesn't
+ * really work.
+ */
+ if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
+ rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
+ &info_buf, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc == 0) {
+ cifsInode->cifsAttrs = dosattr;
+ goto out;
+ } else if (rc != -EOPNOTSUPP && rc != -EINVAL)
+ goto out;
+ }
+
+ cFYI(1, ("calling SetFileInfo since SetPathInfo for "
+ "times not supported by this server"));
+ rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
+ SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
+ CREATE_NOT_DIR, &netfid, &oplock,
+ NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc != 0) {
+ if (rc == -EIO)
+ rc = -EINVAL;
+ goto out;
+ }
+
+ netpid = current->tgid;
+
+set_via_filehandle:
+ rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
+ if (!rc)
+ cifsInode->cifsAttrs = dosattr;
+
+ if (open_file == NULL)
+ CIFSSMBClose(xid, pTcon, netfid);
+ else
+ atomic_dec(&open_file->wrtPending);
+out:
+ return rc;
+}
+
+/*
+ * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit
+ * and rename it to a random name that hopefully won't conflict with
+ * anything else.
+ */
+static int
+cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid)
+{
+ int oplock = 0;
+ int rc;
+ __u16 netfid;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifsTconInfo *tcon = cifs_sb->tcon;
+ __u32 dosattr;
+ FILE_BASIC_INFO *info_buf;
+
+ rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
+ DELETE|FILE_WRITE_ATTRIBUTES,
+ CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE,
+ &netfid, &oplock, NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc != 0)
+ goto out;
+
+ /* set ATTR_HIDDEN and clear ATTR_READONLY */
+ cifsInode = CIFS_I(inode);
+ dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
+ if (dosattr == 0)
+ dosattr |= ATTR_NORMAL;
+ dosattr |= ATTR_HIDDEN;
+
+ info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL);
+ if (info_buf == NULL) {
+ rc = -ENOMEM;
+ goto out_close;
+ }
+ info_buf->Attributes = cpu_to_le32(dosattr);
+ rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid);
+ kfree(info_buf);
+ if (rc != 0)
+ goto out_close;
+ cifsInode->cifsAttrs = dosattr;
+
+ /* silly-rename the file */
+ CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ /* set DELETE_ON_CLOSE */
+ rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid);
+
+ /*
+ * some samba versions return -ENOENT when we try to set the file
+ * disposition here. Likely a samba bug, but work around it for now
+ */
+ if (rc == -ENOENT)
+ rc = 0;
+
+out_close:
+ CIFSSMBClose(xid, tcon, netfid);
+out:
+ return rc;
+}
+
+int cifs_unlink(struct inode *dir, struct dentry *dentry)
{
int rc = 0;
int xid;
- struct cifs_sb_info *cifs_sb;
- struct cifsTconInfo *pTcon;
char *full_path = NULL;
- struct cifsInodeInfo *cifsInode;
- FILE_BASIC_INFO *pinfo_buf;
+ struct inode *inode = dentry->d_inode;
+ struct cifsInodeInfo *cifsInode = CIFS_I(inode);
+ struct super_block *sb = dir->i_sb;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+ struct cifsTconInfo *tcon = cifs_sb->tcon;
+ struct iattr *attrs = NULL;
+ __u32 dosattr = 0, origattr = 0;
- cFYI(1, ("cifs_unlink, inode = 0x%p", inode));
+ cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry));
xid = GetXid();
- if (inode)
- cifs_sb = CIFS_SB(inode->i_sb);
- else
- cifs_sb = CIFS_SB(direntry->d_sb);
- pTcon = cifs_sb->tcon;
-
- /* Unlink can be called from rename so we can not grab the sem here
- since we deadlock otherwise */
-/* mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/
- full_path = build_path_from_dentry(direntry);
-/* mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/
+ /* Unlink can be called from rename so we can not take the
+ * sb->s_vfs_rename_mutex here */
+ full_path = build_path_from_dentry(dentry);
if (full_path == NULL) {
FreeXid(xid);
return -ENOMEM;
}
- if ((pTcon->ses->capabilities & CAP_UNIX) &&
+ if ((tcon->ses->capabilities & CAP_UNIX) &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
- le64_to_cpu(pTcon->fsUnixInfo.Capability))) {
- rc = CIFSPOSIXDelFile(xid, pTcon, full_path,
+ le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+ rc = CIFSPOSIXDelFile(xid, tcon, full_path,
SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
cFYI(1, ("posix del rc %d", rc));
@@ -706,125 +867,60 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry)
goto psx_del_no_retry;
}
- rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls,
+retry_std_delete:
+ rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+
psx_del_no_retry:
if (!rc) {
- if (direntry->d_inode)
- drop_nlink(direntry->d_inode);
+ if (inode)
+ drop_nlink(inode);
} else if (rc == -ENOENT) {
- d_drop(direntry);
+ d_drop(dentry);
} else if (rc == -ETXTBSY) {
- int oplock = 0;
- __u16 netfid;
-
- rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE,
- CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE,
- &netfid, &oplock, NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- CIFSSMBClose(xid, pTcon, netfid);
- if (direntry->d_inode)
- drop_nlink(direntry->d_inode);
+ rc = cifs_rename_pending_delete(full_path, inode, xid);
+ if (rc == 0)
+ drop_nlink(inode);
+ } else if (rc == -EACCES && dosattr == 0) {
+ attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
+ if (attrs == NULL) {
+ rc = -ENOMEM;
+ goto out_reval;
}
- } else if (rc == -EACCES) {
- /* try only if r/o attribute set in local lookup data? */
- pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL);
- if (pinfo_buf) {
- /* ATTRS set to normal clears r/o bit */
- pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
- if (!(pTcon->ses->flags & CIFS_SES_NT4))
- rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
- pinfo_buf,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- else
- rc = -EOPNOTSUPP;
- if (rc == -EOPNOTSUPP) {
- int oplock = 0;
- __u16 netfid;
- /* rc = CIFSSMBSetAttrLegacy(xid, pTcon,
- full_path,
- (__u16)ATTR_NORMAL,
- cifs_sb->local_nls);
- For some strange reason it seems that NT4 eats the
- old setattr call without actually setting the
- attributes so on to the third attempted workaround
- */
-
- /* BB could scan to see if we already have it open
- and pass in pid of opener to function */
- rc = CIFSSMBOpen(xid, pTcon, full_path,
- FILE_OPEN, SYNCHRONIZE |
- FILE_WRITE_ATTRIBUTES, 0,
- &netfid, &oplock, NULL,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- rc = CIFSSMBSetFileInfo(xid, pTcon,
- pinfo_buf,
- netfid,
- current->tgid);
- CIFSSMBClose(xid, pTcon, netfid);
- }
- }
- kfree(pinfo_buf);
- }
- if (rc == 0) {
- rc = CIFSSMBDelFile(xid, pTcon, full_path,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (!rc) {
- if (direntry->d_inode)
- drop_nlink(direntry->d_inode);
- } else if (rc == -ETXTBSY) {
- int oplock = 0;
- __u16 netfid;
-
- rc = CIFSSMBOpen(xid, pTcon, full_path,
- FILE_OPEN, DELETE,
- CREATE_NOT_DIR |
- CREATE_DELETE_ON_CLOSE,
- &netfid, &oplock, NULL,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- CIFSSMBRenameOpenFile(xid, pTcon,
- netfid, NULL,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- CIFSSMBClose(xid, pTcon, netfid);
- if (direntry->d_inode)
- drop_nlink(direntry->d_inode);
- }
- /* BB if rc = -ETXTBUSY goto the rename logic BB */
- }
- }
- }
- if (direntry->d_inode) {
- cifsInode = CIFS_I(direntry->d_inode);
- cifsInode->time = 0; /* will force revalidate to get info
- when needed */
- direntry->d_inode->i_ctime = current_fs_time(inode->i_sb);
+ /* try to reset dos attributes */
+ origattr = cifsInode->cifsAttrs;
+ if (origattr == 0)
+ origattr |= ATTR_NORMAL;
+ dosattr = origattr & ~ATTR_READONLY;
+ if (dosattr == 0)
+ dosattr |= ATTR_NORMAL;
+ dosattr |= ATTR_HIDDEN;
+
+ rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
+ if (rc != 0)
+ goto out_reval;
+
+ goto retry_std_delete;
}
+
+ /* undo the setattr if we errored out and it's needed */
+ if (rc != 0 && dosattr != 0)
+ cifs_set_file_info(inode, attrs, xid, full_path, origattr);
+
+out_reval:
if (inode) {
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
cifsInode = CIFS_I(inode);
- cifsInode->time = 0; /* force revalidate of dir as well */
+ cifsInode->time = 0; /* will force revalidate to get info
+ when needed */
+ inode->i_ctime = current_fs_time(sb);
}
+ dir->i_ctime = dir->i_mtime = current_fs_time(sb);
+ cifsInode = CIFS_I(dir);
+ CIFS_I(dir)->time = 0; /* force revalidate of dir as well */
kfree(full_path);
+ kfree(attrs);
FreeXid(xid);
return rc;
}
@@ -869,7 +965,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode,
int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
{
- int rc = 0;
+ int rc = 0, tmprc;
int xid;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
@@ -931,6 +1027,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
kfree(pInfo);
goto mkdir_get_info;
}
+
/* Is an i_ino of zero legal? */
/* Are there sanity checks we can use to ensure that
the server is really filling in that field? */
@@ -1019,12 +1116,20 @@ mkdir_get_info:
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
(mode & S_IWUGO) == 0) {
FILE_BASIC_INFO pInfo;
+ struct cifsInodeInfo *cifsInode;
+ u32 dosattrs;
+
memset(&pInfo, 0, sizeof(pInfo));
- pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
- CIFSSMBSetPathInfo(xid, pTcon, full_path,
- &pInfo, cifs_sb->local_nls,
+ cifsInode = CIFS_I(newinode);
+ dosattrs = cifsInode->cifsAttrs|ATTR_READONLY;
+ pInfo.Attributes = cpu_to_le32(dosattrs);
+ tmprc = CIFSSMBSetPathInfo(xid, pTcon,
+ full_path, &pInfo,
+ cifs_sb->local_nls,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tmprc == 0)
+ cifsInode->cifsAttrs = dosattrs;
}
if (direntry->d_inode) {
if (cifs_sb->mnt_cifs_flags &
@@ -1096,117 +1201,141 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
return rc;
}
+static int
+cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
+ struct dentry *to_dentry, const char *toPath)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb);
+ struct cifsTconInfo *pTcon = cifs_sb->tcon;
+ __u16 srcfid;
+ int oplock, rc;
+
+ /* try path-based rename first */
+ rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ /*
+ * don't bother with rename by filehandle unless file is busy and
+ * source Note that cross directory moves do not work with
+ * rename by filehandle to various Windows servers.
+ */
+ if (rc == 0 || rc != -ETXTBSY)
+ return rc;
+
+ /* open the file to be renamed -- we need DELETE perms */
+ rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
+ CREATE_NOT_DIR, &srcfid, &oplock, NULL,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ if (rc == 0) {
+ rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid,
+ (const char *) to_dentry->d_name.name,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+ CIFSSMBClose(xid, pTcon, srcfid);
+ }
+
+ return rc;
+}
+
int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
struct inode *target_inode, struct dentry *target_direntry)
{
- char *fromName;
- char *toName;
+ char *fromName = NULL;
+ char *toName = NULL;
struct cifs_sb_info *cifs_sb_source;
struct cifs_sb_info *cifs_sb_target;
struct cifsTconInfo *pTcon;
+ FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
+ FILE_UNIX_BASIC_INFO *info_buf_target;
int xid;
- int rc = 0;
-
- xid = GetXid();
+ int rc;
cifs_sb_target = CIFS_SB(target_inode->i_sb);
cifs_sb_source = CIFS_SB(source_inode->i_sb);
pTcon = cifs_sb_source->tcon;
+ xid = GetXid();
+
+ /*
+ * BB: this might be allowed if same server, but different share.
+ * Consider adding support for this
+ */
if (pTcon != cifs_sb_target->tcon) {
- FreeXid(xid);
- return -EXDEV; /* BB actually could be allowed if same server,
- but different share.
- Might eventually add support for this */
+ rc = -EXDEV;
+ goto cifs_rename_exit;
}
- /* we already have the rename sem so we do not need to grab it again
- here to protect the path integrity */
+ /*
+ * we already have the rename sem so we do not need to
+ * grab it again here to protect the path integrity
+ */
fromName = build_path_from_dentry(source_direntry);
+ if (fromName == NULL) {
+ rc = -ENOMEM;
+ goto cifs_rename_exit;
+ }
+
toName = build_path_from_dentry(target_direntry);
- if ((fromName == NULL) || (toName == NULL)) {
+ if (toName == NULL) {
rc = -ENOMEM;
goto cifs_rename_exit;
}
- rc = CIFSSMBRename(xid, pTcon, fromName, toName,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ rc = cifs_do_rename(xid, source_direntry, fromName,
+ target_direntry, toName);
+
if (rc == -EEXIST) {
- /* check if they are the same file because rename of hardlinked
- files is a noop */
- FILE_UNIX_BASIC_INFO *info_buf_source;
- FILE_UNIX_BASIC_INFO *info_buf_target;
-
- info_buf_source =
- kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
- if (info_buf_source != NULL) {
+ if (pTcon->unix_ext) {
+ /*
+ * Are src and dst hardlinks of same inode? We can
+ * only tell with unix extensions enabled
+ */
+ info_buf_source =
+ kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO),
+ GFP_KERNEL);
+ if (info_buf_source == NULL)
+ goto unlink_target;
+
info_buf_target = info_buf_source + 1;
- if (pTcon->unix_ext)
- rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
- info_buf_source,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags &
+ rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
+ info_buf_source,
+ cifs_sb_source->local_nls,
+ cifs_sb_source->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- /* else rc is still EEXIST so will fall through to
- unlink the target and retry rename */
- if (rc == 0) {
- rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName,
- info_buf_target,
+ if (rc != 0)
+ goto unlink_target;
+
+ rc = CIFSSMBUnixQPathInfo(xid, pTcon,
+ toName, info_buf_target,
cifs_sb_target->local_nls,
/* remap based on source sb */
cifs_sb_source->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- }
- if ((rc == 0) &&
- (info_buf_source->UniqueId ==
- info_buf_target->UniqueId)) {
- /* do not rename since the files are hardlinked which
- is a noop */
- } else {
- /* we either can not tell the files are hardlinked
- (as with Windows servers) or files are not
- hardlinked so delete the target manually before
- renaming to follow POSIX rather than Windows
- semantics */
- cifs_unlink(target_inode, target_direntry);
- rc = CIFSSMBRename(xid, pTcon, fromName,
- toName,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags
- & CIFS_MOUNT_MAP_SPECIAL_CHR);
- }
- kfree(info_buf_source);
- } /* if we can not get memory just leave rc as EEXIST */
- }
-
- if (rc)
- cFYI(1, ("rename rc %d", rc));
-
- if ((rc == -EIO) || (rc == -EEXIST)) {
- int oplock = 0;
- __u16 netfid;
-
- /* BB FIXME Is Generic Read correct for rename? */
- /* if renaming directory - we should not say CREATE_NOT_DIR,
- need to test renaming open directory, also GENERIC_READ
- might not right be right access to request */
- rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ,
- CREATE_NOT_DIR, &netfid, &oplock, NULL,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == 0) {
- rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName,
- cifs_sb_source->local_nls,
- cifs_sb_source->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
- CIFSSMBClose(xid, pTcon, netfid);
- }
+
+ if (rc == 0 && (info_buf_source->UniqueId ==
+ info_buf_target->UniqueId))
+ /* same file, POSIX says that this is a noop */
+ goto cifs_rename_exit;
+ } /* else ... BB we could add the same check for Windows by
+ checking the UniqueId via FILE_INTERNAL_INFO */
+unlink_target:
+ /*
+ * we either can not tell the files are hardlinked (as with
+ * Windows servers) or files are not hardlinked. Delete the
+ * target manually before renaming to follow POSIX rather than
+ * Windows semantics
+ */
+ cifs_unlink(target_inode, target_direntry);
+ rc = cifs_do_rename(xid, source_direntry, fromName,
+ target_direntry, toName);
}
cifs_rename_exit:
+ kfree(info_buf_source);
kfree(fromName);
kfree(toName);
FreeXid(xid);
@@ -1507,101 +1636,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
}
static int
-cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
- char *full_path, __u32 dosattr)
-{
- int rc;
- int oplock = 0;
- __u16 netfid;
- __u32 netpid;
- bool set_time = false;
- struct cifsFileInfo *open_file;
- struct cifsInodeInfo *cifsInode = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
- struct cifsTconInfo *pTcon = cifs_sb->tcon;
- FILE_BASIC_INFO info_buf;
-
- if (attrs->ia_valid & ATTR_ATIME) {
- set_time = true;
- info_buf.LastAccessTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
- } else
- info_buf.LastAccessTime = 0;
-
- if (attrs->ia_valid & ATTR_MTIME) {
- set_time = true;
- info_buf.LastWriteTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
- } else
- info_buf.LastWriteTime = 0;
-
- /*
- * Samba throws this field away, but windows may actually use it.
- * Do not set ctime unless other time stamps are changed explicitly
- * (i.e. by utimes()) since we would then have a mix of client and
- * server times.
- */
- if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
- cFYI(1, ("CIFS - CTIME changed"));
- info_buf.ChangeTime =
- cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
- } else
- info_buf.ChangeTime = 0;
-
- info_buf.CreationTime = 0; /* don't change */
- info_buf.Attributes = cpu_to_le32(dosattr);
-
- /*
- * If the file is already open for write, just use that fileid
- */
- open_file = find_writable_file(cifsInode);
- if (open_file) {
- netfid = open_file->netfid;
- netpid = open_file->pid;
- goto set_via_filehandle;
- }
-
- /*
- * NT4 apparently returns success on this call, but it doesn't
- * really work.
- */
- if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
- rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
- &info_buf, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc != -EOPNOTSUPP && rc != -EINVAL)
- goto out;
- }
-
- cFYI(1, ("calling SetFileInfo since SetPathInfo for "
- "times not supported by this server"));
- rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
- SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
- CREATE_NOT_DIR, &netfid, &oplock,
- NULL, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
-
- if (rc != 0) {
- if (rc == -EIO)
- rc = -EINVAL;
- goto out;
- }
-
- netpid = current->tgid;
-
-set_via_filehandle:
- rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
- if (open_file == NULL)
- CIFSSMBClose(xid, pTcon, netfid);
- else
- atomic_dec(&open_file->wrtPending);
-out:
- return rc;
-}
-
-static int
cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
{
int rc;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 4b17f8fe3157..654d972a88f4 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -150,8 +150,7 @@ cifs_buf_get(void)
but it may be more efficient to always alloc same size
albeit slightly larger than necessary and maxbuffersize
defaults to this and can not be bigger */
- ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp,
- GFP_KERNEL | GFP_NOFS);
+ ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS);
/* clear the first few header bytes */
/* for most paths, more is cleared in header_assemble */
@@ -188,8 +187,7 @@ cifs_small_buf_get(void)
but it may be more efficient to always alloc same size
albeit slightly larger than necessary and maxbuffersize
defaults to this and can not be bigger */
- ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp,
- GFP_KERNEL | GFP_NOFS);
+ ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS);
if (ret_buf) {
/* No need to clear memory here, cleared in header assemble */
/* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 5f40ed3473f5..765adf12d54f 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -640,6 +640,70 @@ static int is_dir_changed(struct file *file)
}
+static int cifs_save_resume_key(const char *current_entry,
+ struct cifsFileInfo *cifsFile)
+{
+ int rc = 0;
+ unsigned int len = 0;
+ __u16 level;
+ char *filename;
+
+ if ((cifsFile == NULL) || (current_entry == NULL))
+ return -EINVAL;
+
+ level = cifsFile->srch_inf.info_level;
+
+ if (level == SMB_FIND_FILE_UNIX) {
+ FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
+
+ filename = &pFindData->FileName[0];
+ if (cifsFile->srch_inf.unicode) {
+ len = cifs_unicode_bytelen(filename);
+ } else {
+ /* BB should we make this strnlen of PATH_MAX? */
+ len = strnlen(filename, PATH_MAX);
+ }
+ cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
+ } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
+ FILE_DIRECTORY_INFO *pFindData =
+ (FILE_DIRECTORY_INFO *)current_entry;
+ filename = &pFindData->FileName[0];
+ len = le32_to_cpu(pFindData->FileNameLength);
+ cifsFile->srch_inf.resume_key = pFindData->FileIndex;
+ } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
+ FILE_FULL_DIRECTORY_INFO *pFindData =
+ (FILE_FULL_DIRECTORY_INFO *)current_entry;
+ filename = &pFindData->FileName[0];
+ len = le32_to_cpu(pFindData->FileNameLength);
+ cifsFile->srch_inf.resume_key = pFindData->FileIndex;
+ } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) {
+ SEARCH_ID_FULL_DIR_INFO *pFindData =
+ (SEARCH_ID_FULL_DIR_INFO *)current_entry;
+ filename = &pFindData->FileName[0];
+ len = le32_to_cpu(pFindData->FileNameLength);
+ cifsFile->srch_inf.resume_key = pFindData->FileIndex;
+ } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
+ FILE_BOTH_DIRECTORY_INFO *pFindData =
+ (FILE_BOTH_DIRECTORY_INFO *)current_entry;
+ filename = &pFindData->FileName[0];
+ len = le32_to_cpu(pFindData->FileNameLength);
+ cifsFile->srch_inf.resume_key = pFindData->FileIndex;
+ } else if (level == SMB_FIND_FILE_INFO_STANDARD) {
+ FIND_FILE_STANDARD_INFO *pFindData =
+ (FIND_FILE_STANDARD_INFO *)current_entry;
+ filename = &pFindData->FileName[0];
+ /* one byte length, no name conversion */
+ len = (unsigned int)pFindData->FileNameLength;
+ cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
+ } else {
+ cFYI(1, ("Unknown findfirst level %d", level));
+ return -EINVAL;
+ }
+ cifsFile->srch_inf.resume_name_len = len;
+ cifsFile->srch_inf.presume_name = filename;
+ return rc;
+}
+
/* find the corresponding entry in the search */
/* Note that the SMB server returns search entries for . and .. which
complicates logic here if we choose to parse for them and we do not
@@ -703,6 +767,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
(rc == 0) && !cifsFile->srch_inf.endOfSearch) {
cFYI(1, ("calling findnext2"));
+ cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
&cifsFile->srch_inf);
if (rc)
@@ -919,69 +984,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file,
return rc;
}
-static int cifs_save_resume_key(const char *current_entry,
- struct cifsFileInfo *cifsFile)
-{
- int rc = 0;
- unsigned int len = 0;
- __u16 level;
- char *filename;
-
- if ((cifsFile == NULL) || (current_entry == NULL))
- return -EINVAL;
-
- level = cifsFile->srch_inf.info_level;
-
- if (level == SMB_FIND_FILE_UNIX) {
- FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry;
-
- filename = &pFindData->FileName[0];
- if (cifsFile->srch_inf.unicode) {
- len = cifs_unicode_bytelen(filename);
- } else {
- /* BB should we make this strnlen of PATH_MAX? */
- len = strnlen(filename, PATH_MAX);
- }
- cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
- } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
- FILE_DIRECTORY_INFO *pFindData =
- (FILE_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) {
- FILE_FULL_DIRECTORY_INFO *pFindData =
- (FILE_FULL_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) {
- SEARCH_ID_FULL_DIR_INFO *pFindData =
- (SEARCH_ID_FULL_DIR_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
- FILE_BOTH_DIRECTORY_INFO *pFindData =
- (FILE_BOTH_DIRECTORY_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- len = le32_to_cpu(pFindData->FileNameLength);
- cifsFile->srch_inf.resume_key = pFindData->FileIndex;
- } else if (level == SMB_FIND_FILE_INFO_STANDARD) {
- FIND_FILE_STANDARD_INFO *pFindData =
- (FIND_FILE_STANDARD_INFO *)current_entry;
- filename = &pFindData->FileName[0];
- /* one byte length, no name conversion */
- len = (unsigned int)pFindData->FileNameLength;
- cifsFile->srch_inf.resume_key = pFindData->ResumeKey;
- } else {
- cFYI(1, ("Unknown findfirst level %d", level));
- return -EINVAL;
- }
- cifsFile->srch_inf.resume_name_len = len;
- cifsFile->srch_inf.presume_name = filename;
- return rc;
-}
int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
{
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 252fdc0567f1..2851d5da0c8c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -624,8 +624,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
ses, nls_cp);
ssetup_exit:
- if (spnego_key)
+ if (spnego_key) {
+ key_revoke(spnego_key);
key_put(spnego_key);
+ }
kfree(str_area);
if (resp_buf_type == CIFS_SMALL_BUFFER) {
cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base));
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e286db9f5ee2..bf0e6d8e382a 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -50,8 +50,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses)
return NULL;
}
- temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp,
- GFP_KERNEL | GFP_NOFS);
+ temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
if (temp == NULL)
return temp;
else {
diff --git a/fs/dcache.c b/fs/dcache.c
index 80e93956aced..e7a1a99b7464 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
if (dentry->d_parent != parent)
goto next;
+ /* non-existing due to RCU? */
+ if (d_unhashed(dentry))
+ goto next;
+
/*
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
@@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
goto next;
}
- if (!d_unhashed(dentry)) {
- atomic_inc(&dentry->d_count);
- found = dentry;
- }
+ atomic_inc(&dentry->d_count);
+ found = dentry;
spin_unlock(&dentry->d_lock);
break;
next:
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 89d2fb7b991a..fd9859f92fad 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -14,6 +14,9 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/configfs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
#include <net/sock.h>
#include "config.h"
@@ -377,24 +380,24 @@ static struct config_item_type node_type = {
.ct_owner = THIS_MODULE,
};
-static struct dlm_cluster *to_cluster(struct config_item *i)
+static struct dlm_cluster *config_item_to_cluster(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct dlm_cluster, group) :
NULL;
}
-static struct dlm_space *to_space(struct config_item *i)
+static struct dlm_space *config_item_to_space(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct dlm_space, group) :
NULL;
}
-static struct dlm_comm *to_comm(struct config_item *i)
+static struct dlm_comm *config_item_to_comm(struct config_item *i)
{
return i ? container_of(i, struct dlm_comm, item) : NULL;
}
-static struct dlm_node *to_node(struct config_item *i)
+static struct dlm_node *config_item_to_node(struct config_item *i)
{
return i ? container_of(i, struct dlm_node, item) : NULL;
}
@@ -450,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g,
static void drop_cluster(struct config_group *g, struct config_item *i)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct config_item *tmp;
int j;
@@ -468,7 +471,7 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
kfree(cl->group.default_groups);
kfree(cl);
}
@@ -507,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
static void drop_space(struct config_group *g, struct config_item *i)
{
- struct dlm_space *sp = to_space(i);
+ struct dlm_space *sp = config_item_to_space(i);
struct config_item *tmp;
int j;
@@ -524,7 +527,7 @@ static void drop_space(struct config_group *g, struct config_item *i)
static void release_space(struct config_item *i)
{
- struct dlm_space *sp = to_space(i);
+ struct dlm_space *sp = config_item_to_space(i);
kfree(sp->group.default_groups);
kfree(sp);
}
@@ -546,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
static void drop_comm(struct config_group *g, struct config_item *i)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
if (local_comm == cm)
local_comm = NULL;
dlm_lowcomms_close(cm->nodeid);
@@ -557,13 +560,13 @@ static void drop_comm(struct config_group *g, struct config_item *i)
static void release_comm(struct config_item *i)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
kfree(cm);
}
static struct config_item *make_node(struct config_group *g, const char *name)
{
- struct dlm_space *sp = to_space(g->cg_item.ci_parent);
+ struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
struct dlm_node *nd;
nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
@@ -585,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
static void drop_node(struct config_group *g, struct config_item *i)
{
- struct dlm_space *sp = to_space(g->cg_item.ci_parent);
- struct dlm_node *nd = to_node(i);
+ struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
+ struct dlm_node *nd = config_item_to_node(i);
mutex_lock(&sp->members_lock);
list_del(&nd->list);
@@ -598,7 +601,7 @@ static void drop_node(struct config_group *g, struct config_item *i)
static void release_node(struct config_item *i)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
kfree(nd);
}
@@ -632,7 +635,7 @@ void dlm_config_exit(void)
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->show ? cla->show(cl, buf) : 0;
@@ -642,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->store ? cla->store(cl, buf, len) : -EINVAL;
@@ -651,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i,
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->show ? cma->show(cm, buf) : 0;
@@ -660,7 +663,7 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->store ? cma->store(cm, buf, len) : -EINVAL;
@@ -714,7 +717,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->show ? nda->show(nd, buf) : 0;
@@ -723,7 +726,7 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->store ? nda->store(nd, buf, len) : -EINVAL;
@@ -768,7 +771,7 @@ static struct dlm_space *get_space(char *name)
i = config_group_find_item(space_list, name);
mutex_unlock(&space_list->cg_subsys->su_mutex);
- return to_space(i);
+ return config_item_to_space(i);
}
static void put_space(struct dlm_space *sp)
@@ -776,6 +779,33 @@ static void put_space(struct dlm_space *sp)
config_item_put(&sp->group.cg_item);
}
+static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
+{
+ switch (x->ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sinx = (struct sockaddr_in *)x;
+ struct sockaddr_in *siny = (struct sockaddr_in *)y;
+ if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
+ return 0;
+ if (sinx->sin_port != siny->sin_port)
+ return 0;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
+ struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
+ if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
+ return 0;
+ if (sinx->sin6_port != siny->sin6_port)
+ return 0;
+ break;
+ }
+ default:
+ return 0;
+ }
+ return 1;
+}
+
static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
{
struct config_item *i;
@@ -788,7 +818,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
mutex_lock(&clusters_root.subsys.su_mutex);
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
- cm = to_comm(i);
+ cm = config_item_to_comm(i);
if (nodeid) {
if (cm->nodeid != nodeid)
@@ -797,8 +827,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
config_item_get(i);
break;
} else {
- if (!cm->addr_count ||
- memcmp(cm->addr[0], addr, sizeof(*addr)))
+ if (!cm->addr_count || !addr_compare(cm->addr[0], addr))
continue;
found = 1;
config_item_get(i);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5a7ac33b629c..868e4c9ef127 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -441,8 +441,11 @@ struct dlm_ls {
uint32_t ls_global_id; /* global unique lockspace ID */
uint32_t ls_exflags;
int ls_lvblen;
- int ls_count; /* reference count */
+ int ls_count; /* refcount of processes in
+ the dlm using this ls */
+ int ls_create_count; /* create/release refcount */
unsigned long ls_flags; /* LSFL_ */
+ unsigned long ls_scan_time;
struct kobject ls_kobj;
struct dlm_rsbtable *ls_rsbtbl;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 499e16759e96..d910501de6d2 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -23,6 +23,7 @@
#include "lock.h"
#include "recover.h"
#include "requestqueue.h"
+#include "user.h"
static int ls_count;
static struct mutex ls_lock;
@@ -211,19 +212,41 @@ void dlm_lockspace_exit(void)
kset_unregister(dlm_kset);
}
+static struct dlm_ls *find_ls_to_scan(void)
+{
+ struct dlm_ls *ls;
+
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (time_after_eq(jiffies, ls->ls_scan_time +
+ dlm_config.ci_scan_secs * HZ)) {
+ spin_unlock(&lslist_lock);
+ return ls;
+ }
+ }
+ spin_unlock(&lslist_lock);
+ return NULL;
+}
+
static int dlm_scand(void *data)
{
struct dlm_ls *ls;
+ int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
while (!kthread_should_stop()) {
- list_for_each_entry(ls, &lslist, ls_list) {
+ ls = find_ls_to_scan();
+ if (ls) {
if (dlm_lock_recovery_try(ls)) {
+ ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
dlm_unlock_recovery(ls);
+ } else {
+ ls->ls_scan_time += HZ;
}
+ } else {
+ schedule_timeout_interruptible(timeout_jiffies);
}
- schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
@@ -246,23 +269,6 @@ static void dlm_scand_stop(void)
kthread_stop(scand_task);
}
-static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen)
-{
- struct dlm_ls *ls;
-
- spin_lock(&lslist_lock);
-
- list_for_each_entry(ls, &lslist, ls_list) {
- if (ls->ls_namelen == namelen &&
- memcmp(ls->ls_name, name, namelen) == 0)
- goto out;
- }
- ls = NULL;
- out:
- spin_unlock(&lslist_lock);
- return ls;
-}
-
struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
{
struct dlm_ls *ls;
@@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls)
for (;;) {
spin_lock(&lslist_lock);
if (ls->ls_count == 0) {
+ WARN_ON(ls->ls_create_count != 0);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
return;
@@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
struct dlm_ls *ls;
- int i, size, error = -ENOMEM;
+ int i, size, error;
int do_unreg = 0;
if (namelen > DLM_LOCKSPACE_LEN)
@@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
if (!try_module_get(THIS_MODULE))
return -EINVAL;
- ls = dlm_find_lockspace_name(name, namelen);
- if (ls) {
- *lockspace = ls;
+ if (!dlm_user_daemon_available()) {
+ module_put(THIS_MODULE);
+ return -EUNATCH;
+ }
+
+ error = 0;
+
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ WARN_ON(ls->ls_create_count <= 0);
+ if (ls->ls_namelen != namelen)
+ continue;
+ if (memcmp(ls->ls_name, name, namelen))
+ continue;
+ if (flags & DLM_LSFL_NEWEXCL) {
+ error = -EEXIST;
+ break;
+ }
+ ls->ls_create_count++;
module_put(THIS_MODULE);
- return -EEXIST;
+ error = 1; /* not an error, return 0 */
+ break;
}
+ spin_unlock(&lslist_lock);
+
+ if (error < 0)
+ goto out;
+ if (error)
+ goto ret_zero;
+
+ error = -ENOMEM;
ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
if (!ls)
@@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lvblen = lvblen;
ls->ls_count = 0;
ls->ls_flags = 0;
+ ls->ls_scan_time = jiffies;
if (flags & DLM_LSFL_TIMEWARN)
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
@@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_allocation = GFP_KERNEL;
/* ls_exflags are forced to match among nodes, and we don't
- need to require all nodes to have TIMEWARN or FS set */
- ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+ need to require all nodes to have some flags set */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
+ DLM_LSFL_NEWEXCL));
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
@@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
down_write(&ls->ls_in_recovery);
spin_lock(&lslist_lock);
+ ls->ls_create_count = 1;
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
@@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
dlm_create_debug_file(ls);
log_debug(ls, "join complete");
-
+ ret_zero:
*lockspace = ls;
return 0;
@@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force)
struct dlm_lkb *lkb;
struct dlm_rsb *rsb;
struct list_head *head;
- int i;
- int busy = lockspace_busy(ls);
+ int i, busy, rv;
+
+ busy = lockspace_busy(ls);
+
+ spin_lock(&lslist_lock);
+ if (ls->ls_create_count == 1) {
+ if (busy > force)
+ rv = -EBUSY;
+ else {
+ /* remove_lockspace takes ls off lslist */
+ ls->ls_create_count = 0;
+ rv = 0;
+ }
+ } else if (ls->ls_create_count > 1) {
+ rv = --ls->ls_create_count;
+ } else {
+ rv = -EINVAL;
+ }
+ spin_unlock(&lslist_lock);
- if (busy > force)
- return -EBUSY;
+ if (rv) {
+ log_debug(ls, "release_lockspace no remove %d", rv);
+ return rv;
+ }
+
+ dlm_device_deregister(ls);
- if (force < 3)
+ if (force < 3 && dlm_user_daemon_available())
do_uevent(ls, 0);
dlm_recoverd_stop(ls);
@@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_clear_members(ls);
dlm_clear_members_gone(ls);
kfree(ls->ls_node_array);
+ log_debug(ls, "release_lockspace final free");
kobject_put(&ls->ls_kobj);
/* The ls structure will be freed when the kobject is done with */
- mutex_lock(&ls_lock);
- ls_count--;
- if (!ls_count)
- threads_stop();
- mutex_unlock(&ls_lock);
-
module_put(THIS_MODULE);
return 0;
}
@@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force)
int dlm_release_lockspace(void *lockspace, int force)
{
struct dlm_ls *ls;
+ int error;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
dlm_put_lockspace(ls);
- return release_lockspace(ls, force);
+
+ mutex_lock(&ls_lock);
+ error = release_lockspace(ls, force);
+ if (!error)
+ ls_count--;
+ else if (!ls_count)
+ threads_stop();
+ mutex_unlock(&ls_lock);
+
+ return error;
+}
+
+void dlm_stop_lockspaces(void)
+{
+ struct dlm_ls *ls;
+
+ restart:
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
+ continue;
+ spin_unlock(&lslist_lock);
+ log_error(ls, "no userland control daemon, stopping lockspace");
+ dlm_ls_stop(ls);
+ goto restart;
+ }
+ spin_unlock(&lslist_lock);
}
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h
index 891eabbdd021..f879f87901f8 100644
--- a/fs/dlm/lockspace.h
+++ b/fs/dlm/lockspace.h
@@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
struct dlm_ls *dlm_find_lockspace_local(void *id);
struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
+void dlm_stop_lockspaces(void);
#endif /* __LOCKSPACE_DOT_H__ */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 34f14a14fb4e..b3832c67194a 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -15,7 +15,6 @@
#include <linux/poll.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
#include <linux/dlm.h>
#include <linux/dlm_device.h>
@@ -27,6 +26,8 @@
static const char name_prefix[] = "dlm";
static const struct file_operations device_fops;
+static atomic_t dlm_monitor_opened;
+static int dlm_monitor_unused = 1;
#ifdef CONFIG_COMPAT
@@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc,
return error;
}
-static int create_misc_device(struct dlm_ls *ls, char *name)
+static int dlm_device_register(struct dlm_ls *ls, char *name)
{
int error, len;
+ /* The device is already registered. This happens when the
+ lockspace is created multiple times from userspace. */
+ if (ls->ls_device.name)
+ return 0;
+
error = -ENOMEM;
len = strlen(name) + strlen(name_prefix) + 2;
ls->ls_device.name = kzalloc(len, GFP_KERNEL);
@@ -363,6 +369,22 @@ fail:
return error;
}
+int dlm_device_deregister(struct dlm_ls *ls)
+{
+ int error;
+
+ /* The device is not registered. This happens when the lockspace
+ was never used from userspace, or when device_create_lockspace()
+ calls dlm_release_lockspace() after the register fails. */
+ if (!ls->ls_device.name)
+ return 0;
+
+ error = misc_deregister(&ls->ls_device);
+ if (!error)
+ kfree(ls->ls_device.name);
+ return error;
+}
+
static int device_user_purge(struct dlm_user_proc *proc,
struct dlm_purge_params *params)
{
@@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!ls)
return -ENOENT;
- error = create_misc_device(ls, params->name);
+ error = dlm_device_register(ls, params->name);
dlm_put_lockspace(ls);
if (error)
@@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
if (!ls)
return -ENOENT;
- /* Deregister the misc device first, so we don't have
- * a device that's not attached to a lockspace. If
- * dlm_release_lockspace fails then we can recreate it
- */
- error = misc_deregister(&ls->ls_device);
- if (error) {
- dlm_put_lockspace(ls);
- goto out;
- }
- kfree(ls->ls_device.name);
-
if (params->flags & DLM_USER_LSFLG_FORCEFREE)
force = 2;
lockspace = ls->ls_local_handle;
+ dlm_put_lockspace(ls);
- /* dlm_release_lockspace waits for references to go to zero,
- so all processes will need to close their device for the ls
- before the release will procede */
+ /* The final dlm_release_lockspace waits for references to go to
+ zero, so all processes will need to close their device for the
+ ls before the release will proceed. release also calls the
+ device_deregister above. Converting a positive return value
+ from release to zero means that userspace won't know when its
+ release was the final one, but it shouldn't need to know. */
- dlm_put_lockspace(ls);
error = dlm_release_lockspace(lockspace, force);
- if (error)
- create_misc_device(ls, ls->ls_name);
- out:
+ if (error > 0)
+ error = 0;
return error;
}
@@ -623,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file)
struct dlm_user_proc *proc;
struct dlm_ls *ls;
- lock_kernel();
ls = dlm_find_lockspace_device(iminor(inode));
- if (!ls) {
- unlock_kernel();
+ if (!ls)
return -ENOENT;
- }
proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
if (!proc) {
dlm_put_lockspace(ls);
- unlock_kernel();
return -ENOMEM;
}
@@ -645,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file)
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
file->private_data = proc;
- unlock_kernel();
return 0;
}
@@ -878,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait)
return 0;
}
+int dlm_user_daemon_available(void)
+{
+ /* dlm_controld hasn't started (or, has started, but not
+ properly populated configfs) */
+
+ if (!dlm_our_nodeid())
+ return 0;
+
+ /* This is to deal with versions of dlm_controld that don't
+ know about the monitor device. We assume that if the
+ dlm_controld was started (above), but the monitor device
+ was never opened, that it's an old version. dlm_controld
+ should open the monitor device before populating configfs. */
+
+ if (dlm_monitor_unused)
+ return 1;
+
+ return atomic_read(&dlm_monitor_opened) ? 1 : 0;
+}
+
static int ctl_device_open(struct inode *inode, struct file *file)
{
- cycle_kernel_lock();
file->private_data = NULL;
return 0;
}
@@ -890,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file)
return 0;
}
+static int monitor_device_open(struct inode *inode, struct file *file)
+{
+ atomic_inc(&dlm_monitor_opened);
+ dlm_monitor_unused = 0;
+ return 0;
+}
+
+static int monitor_device_close(struct inode *inode, struct file *file)
+{
+ if (atomic_dec_and_test(&dlm_monitor_opened))
+ dlm_stop_lockspaces();
+ return 0;
+}
+
static const struct file_operations device_fops = {
.open = device_open,
.release = device_close,
@@ -913,19 +954,42 @@ static struct miscdevice ctl_device = {
.minor = MISC_DYNAMIC_MINOR,
};
+static const struct file_operations monitor_device_fops = {
+ .open = monitor_device_open,
+ .release = monitor_device_close,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice monitor_device = {
+ .name = "dlm-monitor",
+ .fops = &monitor_device_fops,
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
int __init dlm_user_init(void)
{
int error;
+ atomic_set(&dlm_monitor_opened, 0);
+
error = misc_register(&ctl_device);
- if (error)
+ if (error) {
log_print("misc_register failed for control device");
+ goto out;
+ }
+ error = misc_register(&monitor_device);
+ if (error) {
+ log_print("misc_register failed for monitor device");
+ misc_deregister(&ctl_device);
+ }
+ out:
return error;
}
void dlm_user_exit(void)
{
misc_deregister(&ctl_device);
+ misc_deregister(&monitor_device);
}
diff --git a/fs/dlm/user.h b/fs/dlm/user.h
index d38e9f3e4151..35eb6a13d616 100644
--- a/fs/dlm/user.h
+++ b/fs/dlm/user.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -12,5 +12,7 @@
void dlm_user_add_ast(struct dlm_lkb *lkb, int type);
int dlm_user_init(void);
void dlm_user_exit(void);
+int dlm_device_deregister(struct dlm_ls *ls);
+int dlm_user_daemon_available(void);
#endif
diff --git a/fs/exec.c b/fs/exec.c
index 32993beecbe9..cecee501ce78 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm)
tsk->active_mm = mm;
activate_mm(active_mm, mm);
task_unlock(tsk);
- mm_update_next_owner(old_mm);
arch_pick_mmap_layout(mm);
if (old_mm) {
up_read(&old_mm->mmap_sem);
BUG_ON(active_mm != old_mm);
+ mm_update_next_owner(old_mm);
mmput(old_mm);
return 0;
}
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c4af7e..fb98b3d847ed 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/msdos_fs.h>
+#include <linux/blkdev.h>
struct fatent_operations {
void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
struct fat_entry fatent;
struct buffer_head *bhs[MAX_BUF_PER_PAGE];
int i, err, nr_bhs;
+ int first_cl = cluster;
nr_bhs = 0;
fatent_init(&fatent);
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster)
goto error;
}
+ /*
+ * Issue discard for the sectors we no longer care about,
+ * batching contiguous clusters into one request
+ */
+ if (cluster != fatent.entry + 1) {
+ int nr_clus = fatent.entry - first_cl + 1;
+
+ sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
+ nr_clus * sbi->sec_per_clus);
+ first_cl = cluster;
+ }
+
ops->ent_put(&fatent, FAT_ENT_FREE);
if (sbi->free_clusters != -1) {
sbi->free_clusters++;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 13391e546616..c962283d4e7f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
+ if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+ delay = gl->gl_ops->go_min_hold_time;
spin_lock(&gl->gl_spin);
handle_callback(gl, state, 1, delay);
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
*p++ = 'a';
if (flags & GL_EXACT)
*p++ = 'E';
- if (flags & GL_ATIME)
- *p++ = 'a';
if (flags & GL_NOCACHE)
*p++ = 'c';
if (test_bit(HIF_HOLDER, &iflags))
@@ -1816,15 +1816,17 @@ restart:
if (gl) {
gi->gl = hlist_entry(gl->gl_list.next,
struct gfs2_glock, gl_list);
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
+ } else {
+ gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
+ struct gfs2_glock, gl_list);
}
+ if (gi->gl)
+ gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
if (gl)
gfs2_glock_put(gl);
- if (gl && gi->gl == NULL)
- gi->hash++;
while (gi->gl == NULL) {
+ gi->hash++;
if (gi->hash >= GFS2_GL_HASH_SIZE)
return 1;
read_lock(gl_lock_addr(gi->hash));
@@ -1833,7 +1835,6 @@ restart:
if (gi->gl)
gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
- gi->hash++;
}
if (gi->sdp != gi->gl->gl_sbd)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 971d92af70fc..695c6b193611 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -24,7 +24,6 @@
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
-#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GLR_TRYFAILED 13
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 448697a5c462..f566ec1b4e8e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host {
#define GFS2_DATA_ORDERED 2
struct gfs2_args {
- char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
- char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
- char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
- int ar_spectator; /* Don't get a journal because we're always RO */
- int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
- int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
- int ar_localcaching; /* Local-style caching (dangerous on multihost) */
- int ar_debug; /* Oops on errors instead of trying to be graceful */
- int ar_upgrade; /* Upgrade ondisk/multihost format */
- unsigned int ar_num_glockd; /* Number of glockd threads */
- int ar_posix_acl; /* Enable posix acls */
- int ar_quota; /* off/account/on */
- int ar_suiddir; /* suiddir support */
- int ar_data; /* ordered/writeback */
+ char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
+ char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
+ char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
+ unsigned int ar_spectator:1; /* Don't get a journal */
+ unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */
+ unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */
+ unsigned int ar_localcaching:1; /* Local caching */
+ unsigned int ar_debug:1; /* Oops on errors */
+ unsigned int ar_upgrade:1; /* Upgrade ondisk format */
+ unsigned int ar_posix_acl:1; /* Enable posix acls */
+ unsigned int ar_quota:2; /* off/account/on */
+ unsigned int ar_suiddir:1; /* suiddir support */
+ unsigned int ar_data:2; /* ordered/writeback */
+ unsigned int ar_meta:1; /* mount metafs */
+ unsigned int ar_num_glockd; /* Number of glockd threads */
};
struct gfs2_tune {
@@ -419,7 +420,6 @@ struct gfs2_tune {
unsigned int gt_quota_scale_den; /* Denominator */
unsigned int gt_quota_cache_secs;
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
- unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_stall_secs; /* Detects trouble! */
@@ -432,7 +432,7 @@ enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
SDF_SHUTDOWN = 2,
- SDF_NOATIME = 3,
+ SDF_NOBARRIERS = 3,
};
#define GFS2_FSNAME_LEN 256
@@ -461,7 +461,6 @@ struct gfs2_sb_host {
struct gfs2_sbd {
struct super_block *sd_vfs;
- struct super_block *sd_vfs_meta;
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
@@ -499,7 +498,9 @@ struct gfs2_sbd {
/* Inode Stuff */
- struct inode *sd_master_dir;
+ struct dentry *sd_master_dir;
+ struct dentry *sd_root_dir;
+
struct inode *sd_jindex;
struct inode *sd_inum_inode;
struct inode *sd_statfs_inode;
@@ -634,7 +635,6 @@ struct gfs2_sbd {
/* Debugging crud */
unsigned long sd_last_warning;
- struct vfsmount *sd_gfs2mnt;
struct dentry *debugfs_dir; /* debugfs directory */
struct dentry *debugfs_dentry_glocks; /* for debugfs */
};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8b0806a32948..7cee695fa441 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -18,6 +18,7 @@
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include <linux/security.h>
+#include <linux/time.h>
#include "gfs2.h"
#include "incore.h"
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
struct gfs2_dinode_host *di = &ip->i_di;
const struct gfs2_dinode *str = buf;
+ struct timespec atime;
u16 height, depth;
if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
di->di_size = be64_to_cpu(str->di_size);
i_size_write(&ip->i_inode, di->di_size);
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
- ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
- ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+ atime.tv_sec = be64_to_cpu(str->di_atime);
+ atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+ if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+ ip->i_inode.i_atime = atime;
ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (bh)
brelse(bh);
- if (!inode)
- return ERR_PTR(-ENOMEM);
return inode;
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
- if (inode)
+ if (inode && !IS_ERR(inode))
iput(inode);
fail_gunlock:
gfs2_glock_dq(ghs);
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
return 0;
}
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
- struct inode *dir = &to->i_inode;
- struct super_block *sb = dir->i_sb;
- struct inode *tmp;
- struct qstr dotdot;
- int error = 0;
-
- gfs2_str2qstr(&dotdot, "..");
-
- igrab(dir);
-
- for (;;) {
- if (dir == &this->i_inode) {
- error = -EINVAL;
- break;
- }
- if (dir == sb->s_root->d_inode) {
- error = 0;
- break;
- }
-
- tmp = gfs2_lookupi(dir, &dotdot, 1);
- if (IS_ERR(tmp)) {
- error = PTR_ERR(tmp);
- break;
- }
-
- iput(dir);
- dir = tmp;
- }
-
- iput(dir);
-
- return error;
-}
-
/**
* gfs2_readlinki - return the contents of a symlink
* @ip: the symlink's inode
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
unsigned int x;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
- error = gfs2_glock_nq_atime(&i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+ error = gfs2_glock_nq(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
return error;
@@ -1243,101 +1197,6 @@ out:
return error;
}
-/**
- * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
- * conditionally update the inode's atime
- * @gh: the holder to acquire
- *
- * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
- * Update if the difference between the current time and the inode's current
- * atime is greater than an interval specified at mount.
- *
- * Returns: errno
- */
-
-int gfs2_glock_nq_atime(struct gfs2_holder *gh)
-{
- struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct gfs2_inode *ip = gl->gl_object;
- s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
- unsigned int state;
- int flags;
- int error;
- struct timespec tv = CURRENT_TIME;
-
- if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
- gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
- gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
- return -EINVAL;
-
- state = gh->gh_state;
- flags = gh->gh_flags;
-
- error = gfs2_glock_nq(gh);
- if (error)
- return error;
-
- if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
- (sdp->sd_vfs->s_flags & MS_RDONLY))
- return 0;
-
- if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
- gfs2_glock_dq(gh);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
- gh);
- error = gfs2_glock_nq(gh);
- if (error)
- return error;
-
- /* Verify that atime hasn't been updated while we were
- trying to get exclusive lock. */
-
- tv = CURRENT_TIME;
- if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
- struct buffer_head *dibh;
- struct gfs2_dinode *di;
-
- error = gfs2_trans_begin(sdp, RES_DINODE, 0);
- if (error == -EROFS)
- return 0;
- if (error)
- goto fail;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_inode.i_atime = tv;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- di = (struct gfs2_dinode *)dibh->b_data;
- di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
- di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
- brelse(dibh);
-
- gfs2_trans_end(sdp);
- }
-
- /* If someone else has asked for the glock,
- unlock and let them have it. Then reacquire
- in the original state. */
- if (gfs2_glock_is_blocking(gl)) {
- gfs2_glock_dq(gh);
- gfs2_holder_reinit(state, flags, gh);
- return gfs2_glock_nq(gh);
- }
- }
-
- return 0;
-
-fail_end_trans:
- gfs2_trans_end(sdp);
-fail:
- gfs2_glock_dq(gh);
- return error;
-}
-
static int
__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
{
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 58f9607d6a86..2d43f69610a0 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
const struct gfs2_inode *ip);
int gfs2_permission(struct inode *inode, int mask);
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-int gfs2_glock_nq_atime(struct gfs2_holder *gh);
int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 09d78c216f48..0c4cbe6c8285 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data,
error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
&ls->dlm_lockspace,
- DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
+ DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
+ (nodir ? DLM_LSFL_NODIR : 0),
GDLM_LVB_SIZE);
if (error) {
log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6c6af9f5e3ab..ad305854bdc6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/bio.h>
#include "gfs2.h"
#include "incore.h"
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- unlock_buffer(bh);
gfs2_ail1_empty(sdp, 0);
tail = current_tail(sdp);
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
- set_buffer_dirty(bh);
- if (sync_dirty_buffer(bh))
+ bh->b_end_io = end_buffer_write_sync;
+ if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
+ goto skip_barrier;
+ get_bh(bh);
+ submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+ wait_on_buffer(bh);
+ if (buffer_eopnotsupp(bh)) {
+ clear_buffer_eopnotsupp(bh);
+ set_buffer_uptodate(bh);
+ set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ lock_buffer(bh);
+skip_barrier:
+ get_bh(bh);
+ submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+ wait_on_buffer(bh);
+ }
+ if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index b941f9f9f958..df48333e6f01 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@ enum {
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
+ Opt_meta,
Opt_err,
};
@@ -66,6 +67,7 @@ static match_table_t tokens = {
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"},
+ {Opt_meta, "meta"},
{Opt_err, NULL}
};
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
case Opt_data_ordered:
args->ar_data = GFS2_DATA_ORDERED;
break;
+ case Opt_meta:
+ if (remount && args->ar_meta != 1)
+ goto cant_remount;
+ args->ar_meta = 1;
+ break;
case Opt_err:
default:
fs_info(sdp, "unknown option: %s\n", o);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e64a1b04117a..27563816e1c5 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page)
int error;
unlock_page(page);
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- error = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ error = gfs2_glock_nq(&gh);
if (unlikely(error))
goto out;
error = AOP_TRUNCATED_PAGE;
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct gfs2_holder gh;
int ret;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- ret = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
if (unlikely(ret))
goto out_uninit;
if (!gfs2_is_stuffed(ip))
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
unsigned to = from + len;
struct page *page;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
- error = gfs2_glock_nq_atime(&ip->i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
if (unlikely(error))
goto out_uninit;
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
if (gfs2_is_stuffed(ip))
return 0;
- if (offset > i_size_read(&ip->i_inode))
+ if (offset >= i_size_read(&ip->i_inode))
return 0;
return 1;
}
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
* unfortunately have the option of only flushing a range like
* the VFS does.
*/
- gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
- rv = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+ rv = gfs2_glock_nq(&gh);
if (rv)
return rv;
rv = gfs2_ok_for_dio(ip, rw, offset);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index e9a366d4411c..3a747f8e2188 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
u64 offset = file->f_pos;
int error;
- gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
- error = gfs2_glock_nq_atime(&d_gh);
+ gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ error = gfs2_glock_nq(&d_gh);
if (error) {
gfs2_holder_uninit(&d_gh);
return error;
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
int error;
u32 fsflags;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- error = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ error = gfs2_glock_nq(&gh);
if (error)
return error;
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
struct gfs2_alloc *al;
int ret;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
- ret = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
if (ret)
goto out;
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
struct gfs2_holder i_gh;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
- error = gfs2_glock_nq_atime(&i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+ error = gfs2_glock_nq(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
return error;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b4d1d6490633..b117fcf2c4f5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -40,6 +40,44 @@
#define DO 0
#define UNDO 1
+static const u32 gfs2_old_fs_formats[] = {
+ 0
+};
+
+static const u32 gfs2_old_multihost_formats[] = {
+ 0
+};
+
+/**
+ * gfs2_tune_init - Fill a gfs2_tune structure with default values
+ * @gt: tune
+ *
+ */
+
+static void gfs2_tune_init(struct gfs2_tune *gt)
+{
+ spin_lock_init(&gt->gt_spin);
+
+ gt->gt_demote_secs = 300;
+ gt->gt_incore_log_blocks = 1024;
+ gt->gt_log_flush_secs = 60;
+ gt->gt_recoverd_secs = 60;
+ gt->gt_logd_secs = 1;
+ gt->gt_quotad_secs = 5;
+ gt->gt_quota_simul_sync = 64;
+ gt->gt_quota_warn_period = 10;
+ gt->gt_quota_scale_num = 1;
+ gt->gt_quota_scale_den = 1;
+ gt->gt_quota_cache_secs = 300;
+ gt->gt_quota_quantum = 60;
+ gt->gt_new_files_jdata = 0;
+ gt->gt_max_readahead = 1 << 18;
+ gt->gt_stall_secs = 600;
+ gt->gt_complain_secs = 10;
+ gt->gt_statfs_quantum = 30;
+ gt->gt_statfs_slow = 0;
+}
+
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
return sdp;
}
-static void init_vfs(struct super_block *sb, unsigned noatime)
+
+/**
+ * gfs2_check_sb - Check superblock
+ * @sdp: the filesystem
+ * @sb: The superblock
+ * @silent: Don't print a message if the check fails
+ *
+ * Checks the version code of the FS is one that we understand how to
+ * read and that the sizes of the various on-disk structures have not
+ * changed.
+ */
+
+static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
+ unsigned int x;
- sb->s_magic = GFS2_MAGIC;
- sb->s_op = &gfs2_super_ops;
- sb->s_export_op = &gfs2_export_ops;
- sb->s_time_gran = 1;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
+ if (sb->sb_magic != GFS2_MAGIC ||
+ sb->sb_type != GFS2_METATYPE_SB) {
+ if (!silent)
+ printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
+ return -EINVAL;
+ }
+
+ /* If format numbers match exactly, we're done. */
+
+ if (sb->sb_fs_format == GFS2_FORMAT_FS &&
+ sb->sb_multihost_format == GFS2_FORMAT_MULTI)
+ return 0;
+
+ if (sb->sb_fs_format != GFS2_FORMAT_FS) {
+ for (x = 0; gfs2_old_fs_formats[x]; x++)
+ if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
+ break;
+
+ if (!gfs2_old_fs_formats[x]) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_WARNING
+ "GFS2: I don't know how to upgrade this FS\n");
+ return -EINVAL;
+ }
+ }
+
+ if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ for (x = 0; gfs2_old_multihost_formats[x]; x++)
+ if (gfs2_old_multihost_formats[x] ==
+ sb->sb_multihost_format)
+ break;
+
+ if (!gfs2_old_multihost_formats[x]) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_WARNING
+ "GFS2: I don't know how to upgrade this FS\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!sdp->sd_args.ar_upgrade) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_INFO
+ "GFS2: Use the \"upgrade\" mount option to upgrade "
+ "the FS\n");
+ printk(KERN_INFO "GFS2: See the manual for more details\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void end_bio_io_page(struct bio *bio, int error)
+{
+ struct page *page = bio->bi_private;
- if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
- set_bit(noatime, &sdp->sd_flags);
+ if (!error)
+ SetPageUptodate(page);
+ else
+ printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
+ unlock_page(page);
+}
+
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+ const struct gfs2_sb *str = buf;
+
+ sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+ sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+ sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+ sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+ sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+ sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+ sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+ sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+ sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+ sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+ sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+ memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+ memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sdp: The GFS2 super block
+ * @sector: The location of the super block
+ * @error: The error code to return
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: 0 on success or error
+ */
+
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ struct gfs2_sb *p;
+ struct page *page;
+ struct bio *bio;
+
+ page = alloc_page(GFP_NOFS);
+ if (unlikely(!page))
+ return -ENOBUFS;
+
+ ClearPageUptodate(page);
+ ClearPageDirty(page);
+ lock_page(page);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ if (unlikely(!bio)) {
+ __free_page(page);
+ return -ENOBUFS;
+ }
- /* Don't let the VFS update atimes. GFS2 handles this itself. */
- sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
+ bio->bi_sector = sector * (sb->s_blocksize >> 9);
+ bio->bi_bdev = sb->s_bdev;
+ bio_add_page(bio, page, PAGE_SIZE, 0);
+
+ bio->bi_end_io = end_bio_io_page;
+ bio->bi_private = page;
+ submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+ wait_on_page_locked(page);
+ bio_put(bio);
+ if (!PageUptodate(page)) {
+ __free_page(page);
+ return -EIO;
+ }
+ p = kmap(page);
+ gfs2_sb_in(&sdp->sd_sb, p);
+ kunmap(page);
+ __free_page(page);
+ return 0;
+}
+/**
+ * gfs2_read_sb - Read super block
+ * @sdp: The GFS2 superblock
+ * @gl: the glock for the superblock (assumed to be held)
+ * @silent: Don't print message if mount fails
+ *
+ */
+
+static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+{
+ u32 hash_blocks, ind_blocks, leaf_blocks;
+ u32 tmp_blocks;
+ unsigned int x;
+ int error;
+
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ if (error) {
+ if (!silent)
+ fs_err(sdp, "can't read superblock\n");
+ return error;
+ }
+
+ error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
+ if (error)
+ return error;
+
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
+ GFS2_BASIC_BLOCK_SHIFT;
+ sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode)) / sizeof(u64);
+ sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_meta_header)) / sizeof(u64);
+ sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
+ sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
+ sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
+ sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
+ sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_meta_header)) /
+ sizeof(struct gfs2_quota_change);
+
+ /* Compute maximum reservation required to add a entry to a directory */
+
+ hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+ sdp->sd_jbsize);
+
+ ind_blocks = 0;
+ for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
+ tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
+ ind_blocks += tmp_blocks;
+ }
+
+ leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
+
+ sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
+
+ sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode);
+ sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
+ u64 space, d;
+ u32 m;
+
+ space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+
+ if (d != sdp->sd_heightsize[x - 1] || m)
+ break;
+ sdp->sd_heightsize[x] = space;
+ }
+ sdp->sd_max_height = x;
+ sdp->sd_heightsize[x] = ~0;
+ gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
+
+ sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode);
+ sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
+ u64 space, d;
+ u32 m;
+
+ space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+
+ if (d != sdp->sd_jheightsize[x - 1] || m)
+ break;
+ sdp->sd_jheightsize[x] = space;
+ }
+ sdp->sd_max_jheight = x;
+ sdp->sd_jheightsize[x] = ~0;
+ gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
+
+ return 0;
}
static int init_names(struct gfs2_sbd *sdp, int silent)
@@ -224,51 +512,59 @@ fail:
return error;
}
-static inline struct inode *gfs2_lookup_root(struct super_block *sb,
- u64 no_addr)
+static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
+ u64 no_addr, const char *name)
{
- return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ if (IS_ERR(inode)) {
+ fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
+ return PTR_ERR(inode);
+ }
+ dentry = d_alloc_root(inode);
+ if (!dentry) {
+ fs_err(sdp, "can't alloc %s dentry\n", name);
+ iput(inode);
+ return -ENOMEM;
+ }
+ dentry->d_op = &gfs2_dops;
+ *dptr = dentry;
+ return 0;
}
-static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
+static int init_sb(struct gfs2_sbd *sdp, int silent)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder sb_gh;
u64 no_addr;
- struct inode *inode;
- int error = 0;
+ int ret;
- if (undo) {
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- }
- return 0;
+ ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
+ LM_ST_SHARED, 0, &sb_gh);
+ if (ret) {
+ fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
+ return ret;
}
- error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
- LM_ST_SHARED, 0, &sb_gh);
- if (error) {
- fs_err(sdp, "can't acquire superblock glock: %d\n", error);
- return error;
- }
-
- error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
- if (error) {
- fs_err(sdp, "can't read superblock: %d\n", error);
+ ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+ if (ret) {
+ fs_err(sdp, "can't read superblock: %d\n", ret);
goto out;
}
/* Set up the buffer cache and SB for real */
if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
- error = -EINVAL;
+ ret = -EINVAL;
fs_err(sdp, "FS block size (%u) is too small for device "
"block size (%u)\n",
sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
goto out;
}
if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
- error = -EINVAL;
+ ret = -EINVAL;
fs_err(sdp, "FS block size (%u) is too big for machine "
"page size (%u)\n",
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
/* Get the root inode */
no_addr = sdp->sd_sb.sb_root_dir.no_addr;
- if (sb->s_type == &gfs2meta_fs_type)
- no_addr = sdp->sd_sb.sb_master_dir.no_addr;
- inode = gfs2_lookup_root(sb, no_addr);
- if (IS_ERR(inode)) {
- error = PTR_ERR(inode);
- fs_err(sdp, "can't read in root inode: %d\n", error);
+ ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
+ if (ret)
goto out;
- }
- sb->s_root = d_alloc_root(inode);
- if (!sb->s_root) {
- fs_err(sdp, "can't get root dentry\n");
- error = -ENOMEM;
- iput(inode);
- } else
- sb->s_root->d_op = &gfs2_dops;
-
+ /* Get the master inode */
+ no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+ ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
+ if (ret) {
+ dput(sdp->sd_root_dir);
+ goto out;
+ }
+ sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
out:
gfs2_glock_dq_uninit(&sb_gh);
- return error;
+ return ret;
}
/**
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
+ struct inode *master = sdp->sd_master_dir->d_inode;
struct gfs2_holder ji_gh;
struct task_struct *p;
struct gfs2_inode *ip;
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
goto fail_recoverd;
}
- sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
+ sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
if (IS_ERR(sdp->sd_jindex)) {
fs_err(sdp, "can't lookup journal index: %d\n", error);
return PTR_ERR(sdp->sd_jindex);
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
{
int error = 0;
struct gfs2_inode *ip;
- struct inode *inode;
+ struct inode *master = sdp->sd_master_dir->d_inode;
if (undo)
goto fail_qinode;
- inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
- if (IS_ERR(inode)) {
- error = PTR_ERR(inode);
- fs_err(sdp, "can't read in master directory: %d\n", error);
- goto fail;
- }
- sdp->sd_master_dir = inode;
-
error = init_journal(sdp, undo);
if (error)
- goto fail_master;
+ goto fail;
/* Read in the master inode number inode */
- sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
+ sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
if (IS_ERR(sdp->sd_inum_inode)) {
error = PTR_ERR(sdp->sd_inum_inode);
fs_err(sdp, "can't read in inum inode: %d\n", error);
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
/* Read in the master statfs inode */
- sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
+ sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
}
/* Read in the resource index inode */
- sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
+ sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
sdp->sd_rindex_uptodate = 0;
/* Read in the quota inode */
- sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
+ sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
if (IS_ERR(sdp->sd_quota_inode)) {
error = PTR_ERR(sdp->sd_quota_inode);
fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -571,8 +855,6 @@ fail_inum:
iput(sdp->sd_inum_inode);
fail_journal:
init_journal(sdp, UNDO);
-fail_master:
- iput(sdp->sd_master_dir);
fail:
return error;
}
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
char buf[30];
int error = 0;
struct gfs2_inode *ip;
+ struct inode *master = sdp->sd_master_dir->d_inode;
if (sdp->sd_args.ar_spectator)
return 0;
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
if (undo)
goto fail_qc_gh;
- pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
+ pn = gfs2_lookup_simple(master, "per_node");
if (IS_ERR(pn)) {
error = PTR_ERR(pn);
fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent)
goto fail;
}
- init_vfs(sb, SDF_NOATIME);
+ sb->s_magic = GFS2_MAGIC;
+ sb->s_op = &gfs2_super_ops;
+ sb->s_export_op = &gfs2_export_ops;
+ sb->s_time_gran = 1;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
if (error)
goto fail_lm;
- error = init_sb(sdp, silent, DO);
+ error = init_sb(sdp, silent);
if (error)
goto fail_locking;
@@ -869,7 +1156,11 @@ fail_per_node:
fail_inodes:
init_inodes(sdp, UNDO);
fail_sb:
- init_sb(sdp, 0, UNDO);
+ if (sdp->sd_root_dir)
+ dput(sdp->sd_root_dir);
+ if (sdp->sd_master_dir)
+ dput(sdp->sd_master_dir);
+ sb->s_root = NULL;
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
fail_lm:
@@ -887,151 +1178,63 @@ fail:
}
static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+ const char *dev_name, void *data, struct vfsmount *mnt)
{
- struct super_block *sb;
- struct gfs2_sbd *sdp;
- int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
- if (error)
- goto out;
- sb = mnt->mnt_sb;
- sdp = sb->s_fs_info;
- sdp->sd_gfs2mnt = mnt;
-out:
- return error;
+ return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
}
-static int fill_super_meta(struct super_block *sb, struct super_block *new,
- void *data, int silent)
+static struct super_block *get_gfs2_sb(const char *dev_name)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- struct inode *inode;
- int error = 0;
-
- new->s_fs_info = sdp;
- sdp->sd_vfs_meta = sb;
-
- init_vfs(new, SDF_NOATIME);
-
- /* Get the master inode */
- inode = igrab(sdp->sd_master_dir);
-
- new->s_root = d_alloc_root(inode);
- if (!new->s_root) {
- fs_err(sdp, "can't get root dentry\n");
- error = -ENOMEM;
- iput(inode);
- } else
- new->s_root->d_op = &gfs2_dops;
-
- return error;
-}
-
-static int set_bdev_super(struct super_block *s, void *data)
-{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- return 0;
-}
-
-static int test_bdev_super(struct super_block *s, void *data)
-{
- return s->s_bdev == data;
-}
-
-static struct super_block* get_gfs2_sb(const char *dev_name)
-{
- struct kstat stat;
+ struct super_block *sb;
struct nameidata nd;
- struct super_block *sb = NULL, *s;
int error;
error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
if (error) {
- printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
- dev_name);
- goto out;
- }
- error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
-
- list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) {
- if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
- (S_ISDIR(stat.mode) &&
- s == nd.path.dentry->d_inode->i_sb)) {
- sb = s;
- goto free_nd;
- }
+ printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
+ dev_name, error);
+ return NULL;
}
-
- printk(KERN_WARNING "GFS2: Unrecognized block device or "
- "mount point %s\n", dev_name);
-
-free_nd:
+ sb = nd.path.dentry->d_inode->i_sb;
+ if (sb && (sb->s_type == &gfs2_fs_type))
+ atomic_inc(&sb->s_active);
+ else
+ sb = NULL;
path_put(&nd.path);
-out:
return sb;
}
static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
- int error = 0;
- struct super_block *sb = NULL, *new;
+ struct super_block *sb = NULL;
struct gfs2_sbd *sdp;
sb = get_gfs2_sb(dev_name);
if (!sb) {
printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
- error = -ENOENT;
- goto error;
+ return -ENOENT;
}
sdp = sb->s_fs_info;
- if (sdp->sd_vfs_meta) {
- printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
- error = -EBUSY;
- goto error;
- }
- down(&sb->s_bdev->bd_mount_sem);
- new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
- up(&sb->s_bdev->bd_mount_sem);
- if (IS_ERR(new)) {
- error = PTR_ERR(new);
- goto error;
- }
- new->s_flags = flags;
- strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
- sb_set_blocksize(new, sb->s_blocksize);
- error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- up_write(&new->s_umount);
- deactivate_super(new);
- goto error;
- }
-
- new->s_flags |= MS_ACTIVE;
-
- /* Grab a reference to the gfs2 mount point */
- atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
- return simple_set_mnt(mnt, new);
-error:
- return error;
+ mnt->mnt_sb = sb;
+ mnt->mnt_root = dget(sdp->sd_master_dir);
+ return 0;
}
static void gfs2_kill_sb(struct super_block *sb)
{
- if (sb->s_fs_info) {
- gfs2_delete_debugfs_file(sb->s_fs_info);
- gfs2_meta_syncfs(sb->s_fs_info);
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ if (sdp) {
+ gfs2_meta_syncfs(sdp);
+ dput(sdp->sd_root_dir);
+ dput(sdp->sd_master_dir);
+ sdp->sd_root_dir = NULL;
+ sdp->sd_master_dir = NULL;
}
+ shrink_dcache_sb(sb);
kill_block_super(sb);
-}
-
-static void gfs2_kill_sb_meta(struct super_block *sb)
-{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- generic_shutdown_super(sb);
- sdp->sd_vfs_meta = NULL;
- atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
+ if (sdp)
+ gfs2_delete_debugfs_file(sdp);
}
struct file_system_type gfs2_fs_type = {
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
.get_sb = gfs2_get_sb_meta,
- .kill_sb = gfs2_kill_sb_meta,
.owner = THIS_MODULE,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f73a778..534e1e2c65ca 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
@@ -245,8 +249,10 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(dip);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
-out:
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_glock_dq(ghs);
+out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
- goto out_rgrp;
+ goto out_gunlock;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
+out_gunlock:
gfs2_glock_dq(ghs + 2);
out_rgrp:
gfs2_holder_uninit(ghs + 2);
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_holder ri_gh;
int error;
-
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
return error;
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
- error = gfs2_glock_nq_m(3, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ if (error)
+ goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(3, ghs);
-out:
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs + 2);
+out_rgrp:
gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
return 0;
}
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+ struct inode *dir = &to->i_inode;
+ struct super_block *sb = dir->i_sb;
+ struct inode *tmp;
+ struct qstr dotdot;
+ int error = 0;
+
+ gfs2_str2qstr(&dotdot, "..");
+
+ igrab(dir);
+
+ for (;;) {
+ if (dir == &this->i_inode) {
+ error = -EINVAL;
+ break;
+ }
+ if (dir == sb->s_root->d_inode) {
+ error = 0;
+ break;
+ }
+
+ tmp = gfs2_lookupi(dir, &dotdot, 1);
+ if (IS_ERR(tmp)) {
+ error = PTR_ERR(tmp);
+ break;
+ }
+
+ iput(dir);
+ dir = tmp;
+ }
+
+ iput(dir);
+
+ return error;
+}
+
/**
* gfs2_rename - Rename a file
* @odir: Parent directory of old file name
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh;
+ struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
return 0;
}
- /* Make sure we aren't trying to move a dirctory into it's subdir */
-
- if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
- dir_rename = 1;
- error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
- &r_gh);
+ if (odip != ndip) {
+ error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+ 0, &r_gh);
if (error)
goto out;
- error = gfs2_ok_to_move(ip, ndip);
- if (error)
- goto out_gunlock_r;
+ if (S_ISDIR(ip->i_inode.i_mode)) {
+ dir_rename = 1;
+ /* don't move a dirctory into it's subdir */
+ error = gfs2_ok_to_move(ip, ndip);
+ if (error)
+ goto out_gunlock_r;
+ }
}
num_gh = 1;
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
- error = gfs2_glock_nq_m(num_gh, ghs);
- if (error)
- goto out_uninit;
+ for (x = 0; x < num_gh; x++) {
+ error = gfs2_glock_nq(ghs + x);
+ if (error)
+ goto out_gunlock;
+ }
/* Check out the old directory */
@@ -804,12 +873,12 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(ndip);
out_gunlock:
- gfs2_glock_dq_m(num_gh, ghs);
-out_uninit:
- for (x = 0; x < num_gh; x++)
+ while (x--) {
+ gfs2_glock_dq(ghs + x);
gfs2_holder_uninit(ghs + x);
+ }
out_gunlock_r:
- if (dir_rename)
+ if (r_gh.gh_gl)
gfs2_glock_dq_uninit(&r_gh);
out:
return error;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index f66ea0f7a356..d5355d9b5926 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -20,6 +20,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/time.h>
#include "gfs2.h"
#include "incore.h"
@@ -38,6 +39,7 @@
#include "dir.h"
#include "eattr.h"
#include "bmap.h"
+#include "meta_io.h"
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
@@ -50,16 +52,74 @@
static int gfs2_write_inode(struct inode *inode, int sync)
{
struct gfs2_inode *ip = GFS2_I(inode);
-
- /* Check this is a "normal" inode */
- if (test_bit(GIF_USER, &ip->i_flags)) {
- if (current->flags & PF_MEMALLOC)
- return 0;
- if (sync)
- gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_holder gh;
+ struct buffer_head *bh;
+ struct timespec atime;
+ struct gfs2_dinode *di;
+ int ret = 0;
+
+ /* Check this is a "normal" inode, etc */
+ if (!test_bit(GIF_USER, &ip->i_flags) ||
+ (current->flags & PF_MEMALLOC))
+ return 0;
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ if (ret)
+ goto do_flush;
+ ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+ if (ret)
+ goto do_unlock;
+ ret = gfs2_meta_inode_buffer(ip, &bh);
+ if (ret == 0) {
+ di = (struct gfs2_dinode *)bh->b_data;
+ atime.tv_sec = be64_to_cpu(di->di_atime);
+ atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
+ if (timespec_compare(&inode->i_atime, &atime) > 0) {
+ gfs2_trans_add_bh(ip->i_gl, bh, 1);
+ gfs2_dinode_out(ip, bh->b_data);
+ }
+ brelse(bh);
}
+ gfs2_trans_end(sdp);
+do_unlock:
+ gfs2_glock_dq_uninit(&gh);
+do_flush:
+ if (sync != 0)
+ gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ return ret;
+}
- return 0;
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+ struct gfs2_holder t_gh;
+ int error;
+
+ gfs2_quota_sync(sdp);
+ gfs2_statfs_sync(sdp);
+
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+ &t_gh);
+ if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ return error;
+
+ gfs2_meta_syncfs(sdp);
+ gfs2_log_shutdown(sdp);
+
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+ if (t_gh.gh_gl)
+ gfs2_glock_dq_uninit(&t_gh);
+
+ gfs2_quota_cleanup(sdp);
+
+ return error;
}
/**
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
- if (!sdp)
- return;
-
- if (!strncmp(sb->s_type->name, "gfs2meta", 8))
- return; /* Nothing to do */
-
/* Unfreeze the filesystem, if we need to */
mutex_lock(&sdp->sd_freeze_lock);
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb)
/* Release stuff */
- iput(sdp->sd_master_dir);
iput(sdp->sd_jindex);
iput(sdp->sd_inum_inode);
iput(sdp->sd_statfs_inode);
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb)
*
* Flushes the log to disk.
*/
+
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
sb->s_dirt = 0;
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
}
}
- if (*flags & (MS_NOATIME | MS_NODIRATIME))
- set_bit(SDF_NOATIME, &sdp->sd_flags);
- else
- clear_bit(SDF_NOATIME, &sdp->sd_flags);
-
- /* Don't let the VFS update atimes. GFS2 handles this itself. */
- *flags |= MS_NOATIME | MS_NODIRATIME;
-
return error;
}
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
* inode's blocks, or alternatively pass the baton on to another
* node for later deallocation.
*/
+
static void gfs2_drop_inode(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode)
}
}
+static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
+{
+ do {
+ if (d1 == d2)
+ return 1;
+ d1 = d1->d_parent;
+ } while (!IS_ROOT(d1));
+ return 0;
+}
+
/**
* gfs2_show_options - Show mount options for /proc/mounts
* @s: seq_file structure
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
+ if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+ seq_printf(s, ",meta");
if (args->ar_lockproto[0])
seq_printf(s, ",lockproto=%s", args->ar_lockproto);
if (args->ar_locktable[0])
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
* conversion on the iopen lock, but we can change that later. This
* is safe, just less efficient.
*/
+
static void gfs2_delete_inode(struct inode *inode)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
@@ -478,8 +538,6 @@ out:
clear_inode(inode);
}
-
-
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
struct gfs2_inode *ip;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca831991cbc2..c3ba3d9d0aac 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,313 +33,6 @@
#include "trans.h"
#include "util.h"
-static const u32 gfs2_old_fs_formats[] = {
- 0
-};
-
-static const u32 gfs2_old_multihost_formats[] = {
- 0
-};
-
-/**
- * gfs2_tune_init - Fill a gfs2_tune structure with default values
- * @gt: tune
- *
- */
-
-void gfs2_tune_init(struct gfs2_tune *gt)
-{
- spin_lock_init(&gt->gt_spin);
-
- gt->gt_demote_secs = 300;
- gt->gt_incore_log_blocks = 1024;
- gt->gt_log_flush_secs = 60;
- gt->gt_recoverd_secs = 60;
- gt->gt_logd_secs = 1;
- gt->gt_quotad_secs = 5;
- gt->gt_quota_simul_sync = 64;
- gt->gt_quota_warn_period = 10;
- gt->gt_quota_scale_num = 1;
- gt->gt_quota_scale_den = 1;
- gt->gt_quota_cache_secs = 300;
- gt->gt_quota_quantum = 60;
- gt->gt_atime_quantum = 3600;
- gt->gt_new_files_jdata = 0;
- gt->gt_max_readahead = 1 << 18;
- gt->gt_stall_secs = 600;
- gt->gt_complain_secs = 10;
- gt->gt_statfs_quantum = 30;
- gt->gt_statfs_slow = 0;
-}
-
-/**
- * gfs2_check_sb - Check superblock
- * @sdp: the filesystem
- * @sb: The superblock
- * @silent: Don't print a message if the check fails
- *
- * Checks the version code of the FS is one that we understand how to
- * read and that the sizes of the various on-disk structures have not
- * changed.
- */
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
-{
- unsigned int x;
-
- if (sb->sb_magic != GFS2_MAGIC ||
- sb->sb_type != GFS2_METATYPE_SB) {
- if (!silent)
- printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
- return -EINVAL;
- }
-
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
-
- if (sb->sb_fs_format != GFS2_FORMAT_FS) {
- for (x = 0; gfs2_old_fs_formats[x]; x++)
- if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
- break;
-
- if (!gfs2_old_fs_formats[x]) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_WARNING
- "GFS2: I don't know how to upgrade this FS\n");
- return -EINVAL;
- }
- }
-
- if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
- for (x = 0; gfs2_old_multihost_formats[x]; x++)
- if (gfs2_old_multihost_formats[x] ==
- sb->sb_multihost_format)
- break;
-
- if (!gfs2_old_multihost_formats[x]) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_WARNING
- "GFS2: I don't know how to upgrade this FS\n");
- return -EINVAL;
- }
- }
-
- if (!sdp->sd_args.ar_upgrade) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_INFO
- "GFS2: Use the \"upgrade\" mount option to upgrade "
- "the FS\n");
- printk(KERN_INFO "GFS2: See the manual for more details\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-
-static void end_bio_io_page(struct bio *bio, int error)
-{
- struct page *page = bio->bi_private;
-
- if (!error)
- SetPageUptodate(page);
- else
- printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
- unlock_page(page);
-}
-
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
- const struct gfs2_sb *str = buf;
-
- sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
- sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
- sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
- sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
- sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
- sb->sb_bsize = be32_to_cpu(str->sb_bsize);
- sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
- sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
- sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
- sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
- sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
-
- memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
- memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-/**
- * gfs2_read_super - Read the gfs2 super block from disk
- * @sdp: The GFS2 super block
- * @sector: The location of the super block
- * @error: The error code to return
- *
- * This uses the bio functions to read the super block from disk
- * because we want to be 100% sure that we never read cached data.
- * A super block is read twice only during each GFS2 mount and is
- * never written to by the filesystem. The first time its read no
- * locks are held, and the only details which are looked at are those
- * relating to the locking protocol. Once locking is up and working,
- * the sb is read again under the lock to establish the location of
- * the master directory (contains pointers to journals etc) and the
- * root directory.
- *
- * Returns: 0 on success or error
- */
-
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
-{
- struct super_block *sb = sdp->sd_vfs;
- struct gfs2_sb *p;
- struct page *page;
- struct bio *bio;
-
- page = alloc_page(GFP_NOFS);
- if (unlikely(!page))
- return -ENOBUFS;
-
- ClearPageUptodate(page);
- ClearPageDirty(page);
- lock_page(page);
-
- bio = bio_alloc(GFP_NOFS, 1);
- if (unlikely(!bio)) {
- __free_page(page);
- return -ENOBUFS;
- }
-
- bio->bi_sector = sector * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
- bio_add_page(bio, page, PAGE_SIZE, 0);
-
- bio->bi_end_io = end_bio_io_page;
- bio->bi_private = page;
- submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
- wait_on_page_locked(page);
- bio_put(bio);
- if (!PageUptodate(page)) {
- __free_page(page);
- return -EIO;
- }
- p = kmap(page);
- gfs2_sb_in(&sdp->sd_sb, p);
- kunmap(page);
- __free_page(page);
- return 0;
-}
-
-/**
- * gfs2_read_sb - Read super block
- * @sdp: The GFS2 superblock
- * @gl: the glock for the superblock (assumed to be held)
- * @silent: Don't print message if mount fails
- *
- */
-
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
-{
- u32 hash_blocks, ind_blocks, leaf_blocks;
- u32 tmp_blocks;
- unsigned int x;
- int error;
-
- error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
- if (error) {
- if (!silent)
- fs_err(sdp, "can't read superblock\n");
- return error;
- }
-
- error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
- if (error)
- return error;
-
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
- GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
- sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode)) / sizeof(u64);
- sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_meta_header)) / sizeof(u64);
- sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
- sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
- sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
- sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
- sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_meta_header)) /
- sizeof(struct gfs2_quota_change);
-
- /* Compute maximum reservation required to add a entry to a directory */
-
- hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
- sdp->sd_jbsize);
-
- ind_blocks = 0;
- for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
- tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
- ind_blocks += tmp_blocks;
- }
-
- leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
-
- sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
-
- sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_heightsize[x - 1] || m)
- break;
- sdp->sd_heightsize[x] = space;
- }
- sdp->sd_max_height = x;
- sdp->sd_heightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
-
- sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_jheightsize[x - 1] || m)
- break;
- sdp->sd_jheightsize[x] = space;
- }
- sdp->sd_max_jheight = x;
- sdp->sd_jheightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
- return 0;
-}
-
/**
* gfs2_jindex_hold - Grab a lock on the jindex
* @sdp: The GFS2 superblock
@@ -581,39 +274,6 @@ fail:
return error;
}
-/**
- * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
- * @sdp: the filesystem
- *
- * Returns: errno
- */
-
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
-{
- struct gfs2_holder t_gh;
- int error;
-
- gfs2_quota_sync(sdp);
- gfs2_statfs_sync(sdp);
-
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
- &t_gh);
- if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
- return error;
-
- gfs2_meta_syncfs(sdp);
- gfs2_log_shutdown(sdp);
-
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
- if (t_gh.gh_gl)
- gfs2_glock_dq_uninit(&t_gh);
-
- gfs2_quota_cleanup(sdp);
-
- return error;
-}
-
static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
{
const struct gfs2_statfs_change *str = buf;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 44361ecc44f7..50a4c9b1215e 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -12,11 +12,6 @@
#include "incore.h"
-void gfs2_tune_init(struct gfs2_tune *gt);
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
int gfs2_statfs_init(struct gfs2_sbd *sdp);
void gfs2_statfs_change(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 74846559fc3f..7e1879f1a02c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n");
ARGS_ATTR(suiddir, "%d\n");
ARGS_ATTR(data, "%d\n");
-/* one oddball doesn't fit the macro mold */
-static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- !!test_bit(SDF_NOATIME, &sdp->sd_flags));
-}
-static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
-
static struct attribute *args_attrs[] = {
&args_attr_lockproto.attr,
&args_attr_locktable.attr,
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = {
&args_attr_quota.attr,
&args_attr_suiddir.attr,
&args_attr_data.attr,
- &args_attr_noatime.attr,
NULL,
};
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0);
TUNE_ATTR(log_flush_secs, 0);
TUNE_ATTR(quota_warn_period, 0);
TUNE_ATTR(quota_quantum, 0);
-TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_log_flush_secs.attr,
&tune_attr_quota_warn_period.attr,
&tune_attr_quota_quantum.attr,
- &tune_attr_atime_quantum.attr,
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 60249429a253..d85c7d931cdf 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -323,7 +323,7 @@ out:
}
/*
- * remove_kevent - cleans up and ultimately frees the given kevent
+ * remove_kevent - cleans up the given kevent
*
* Caller must hold dev->ev_mutex.
*/
@@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev,
dev->event_count--;
dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
+}
+/*
+ * free_kevent - frees the given kevent.
+ */
+static void free_kevent(struct inotify_kernel_event *kevent)
+{
kfree(kevent->name);
kmem_cache_free(event_cachep, kevent);
}
@@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
struct inotify_kernel_event *kevent;
kevent = inotify_dev_get_event(dev);
remove_kevent(dev, kevent);
+ free_kevent(kevent);
}
}
@@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
dev = file->private_data;
while (1) {
- int events;
prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&dev->ev_mutex);
- events = !list_empty(&dev->events);
- mutex_unlock(&dev->ev_mutex);
- if (events) {
+ if (!list_empty(&dev->events)) {
ret = 0;
break;
}
+ mutex_unlock(&dev->ev_mutex);
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
@@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
if (ret)
return ret;
- mutex_lock(&dev->ev_mutex);
while (1) {
struct inotify_kernel_event *kevent;
@@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
}
break;
}
+ remove_kevent(dev, kevent);
+
+ /*
+ * Must perform the copy_to_user outside the mutex in order
+ * to avoid a lock order reversal with mmap_sem.
+ */
+ mutex_unlock(&dev->ev_mutex);
if (copy_to_user(buf, &kevent->event, event_size)) {
ret = -EFAULT;
@@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
count -= kevent->event.len;
}
- remove_kevent(dev, kevent);
+ free_kevent(kevent);
+
+ mutex_lock(&dev->ev_mutex);
}
mutex_unlock(&dev->ev_mutex);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ecc3330972e5..7408227c49c9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
* a pointer to that same buffer (for convenience).
*/
-char *disk_name(struct gendisk *hd, int part, char *buf)
+char *disk_name(struct gendisk *hd, int partno, char *buf)
{
- if (!part)
+ if (!partno)
snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
- snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part);
+ snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
else
- snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part);
+ snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
return buf;
}
const char *bdevname(struct block_device *bdev, char *buf)
{
- int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor;
- return disk_name(bdev->bd_disk, part, buf);
+ return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
}
EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
if (isdigit(state->name[strlen(state->name)-1]))
sprintf(state->name, "p");
- state->limit = hd->minors;
+ state->limit = disk_max_parts(hd);
i = res = err = 0;
while (!res && check_part[i]) {
memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}
-static ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
-static ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ int cpu;
- preempt_disable();
- part_round_stats(p);
- preempt_enable();
+ cpu = part_stat_lock();
+ part_round_stats(cpu, p);
+ part_stat_unlock();
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev,
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%d\n", p->make_it_fail);
}
-static ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+ssize_t part_fail_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct hd_struct *p = dev_to_part(dev);
int i;
@@ -300,40 +300,34 @@ struct device_type part_type = {
.release = part_release,
};
-static inline void partition_sysfs_add_subdir(struct hd_struct *p)
-{
- struct kobject *k;
-
- k = kobject_get(&p->dev.kobj);
- p->holder_dir = kobject_create_and_add("holders", k);
- kobject_put(k);
-}
-
-static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+static void delete_partition_rcu_cb(struct rcu_head *head)
{
- struct kobject *k;
+ struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
- k = kobject_get(&disk->dev.kobj);
- disk->holder_dir = kobject_create_and_add("holders", k);
- disk->slave_dir = kobject_create_and_add("slaves", k);
- kobject_put(k);
+ part->start_sect = 0;
+ part->nr_sects = 0;
+ part_stat_set_all(part, 0);
+ put_device(part_to_dev(part));
}
-void delete_partition(struct gendisk *disk, int part)
+void delete_partition(struct gendisk *disk, int partno)
{
- struct hd_struct *p = disk->part[part-1];
+ struct disk_part_tbl *ptbl = disk->part_tbl;
+ struct hd_struct *part;
- if (!p)
+ if (partno >= ptbl->len)
return;
- if (!p->nr_sects)
+
+ part = ptbl->part[partno];
+ if (!part)
return;
- disk->part[part-1] = NULL;
- p->start_sect = 0;
- p->nr_sects = 0;
- part_stat_set_all(p, 0);
- kobject_put(p->holder_dir);
- device_del(&p->dev);
- put_device(&p->dev);
+
+ blk_free_devt(part_devt(part));
+ rcu_assign_pointer(ptbl->part[partno], NULL);
+ kobject_put(part->holder_dir);
+ device_del(part_to_dev(part));
+
+ call_rcu(&part->rcu_head, delete_partition_rcu_cb);
}
static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
-int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int partno,
+ sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
+ dev_t devt = MKDEV(0, 0);
+ struct device *ddev = disk_to_dev(disk);
+ struct device *pdev;
+ struct disk_part_tbl *ptbl;
+ const char *dname;
int err;
+ err = disk_expand_part_tbl(disk, partno);
+ if (err)
+ return err;
+ ptbl = disk->part_tbl;
+
+ if (ptbl->part[partno])
+ return -EBUSY;
+
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
if (!init_part_stats(p)) {
err = -ENOMEM;
- goto out0;
+ goto out_free;
}
+ pdev = part_to_dev(p);
+
p->start_sect = start;
p->nr_sects = len;
- p->partno = part;
- p->policy = disk->policy;
+ p->partno = partno;
+ p->policy = get_disk_ro(disk);
- if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1]))
- snprintf(p->dev.bus_id, BUS_ID_SIZE,
- "%sp%d", disk->dev.bus_id, part);
+ dname = dev_name(ddev);
+ if (isdigit(dname[strlen(dname) - 1]))
+ snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
else
- snprintf(p->dev.bus_id, BUS_ID_SIZE,
- "%s%d", disk->dev.bus_id, part);
+ snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
- device_initialize(&p->dev);
- p->dev.devt = MKDEV(disk->major, disk->first_minor + part);
- p->dev.class = &block_class;
- p->dev.type = &part_type;
- p->dev.parent = &disk->dev;
- disk->part[part-1] = p;
+ device_initialize(pdev);
+ pdev->class = &block_class;
+ pdev->type = &part_type;
+ pdev->parent = ddev;
+
+ err = blk_alloc_devt(p, &devt);
+ if (err)
+ goto out_free;
+ pdev->devt = devt;
/* delay uevent until 'holders' subdir is created */
- p->dev.uevent_suppress = 1;
- err = device_add(&p->dev);
+ pdev->uevent_suppress = 1;
+ err = device_add(pdev);
if (err)
- goto out1;
- partition_sysfs_add_subdir(p);
- p->dev.uevent_suppress = 0;
+ goto out_put;
+
+ err = -ENOMEM;
+ p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
+ if (!p->holder_dir)
+ goto out_del;
+
+ pdev->uevent_suppress = 0;
if (flags & ADDPART_FLAG_WHOLEDISK) {
- err = device_create_file(&p->dev, &dev_attr_whole_disk);
+ err = device_create_file(pdev, &dev_attr_whole_disk);
if (err)
- goto out2;
+ goto out_del;
}
+ /* everything is up and running, commence */
+ INIT_RCU_HEAD(&p->rcu_head);
+ rcu_assign_pointer(ptbl->part[partno], p);
+
/* suppress uevent if the disk supresses it */
- if (!disk->dev.uevent_suppress)
- kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+ if (!ddev->uevent_suppress)
+ kobject_uevent(&pdev->kobj, KOBJ_ADD);
return 0;
-out2:
- device_del(&p->dev);
-out1:
- put_device(&p->dev);
- free_part_stats(p);
-out0:
+out_free:
kfree(p);
return err;
+out_del:
+ kobject_put(p->holder_dir);
+ device_del(pdev);
+out_put:
+ put_device(pdev);
+ blk_free_devt(devt);
+ return err;
}
/* Not exported, helper to add_disk(). */
void register_disk(struct gendisk *disk)
{
+ struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
char *s;
- int i;
- struct hd_struct *p;
int err;
- disk->dev.parent = disk->driverfs_dev;
- disk->dev.devt = MKDEV(disk->major, disk->first_minor);
+ ddev->parent = disk->driverfs_dev;
- strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
+ strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
/* ewww... some of these buggers have / in the name... */
- s = strchr(disk->dev.bus_id, '/');
+ s = strchr(ddev->bus_id, '/');
if (s)
*s = '!';
/* delay uevents, until we scanned partition table */
- disk->dev.uevent_suppress = 1;
+ ddev->uevent_suppress = 1;
- if (device_add(&disk->dev))
+ if (device_add(ddev))
return;
#ifndef CONFIG_SYSFS_DEPRECATED
- err = sysfs_create_link(block_depr, &disk->dev.kobj,
- kobject_name(&disk->dev.kobj));
+ err = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
if (err) {
- device_del(&disk->dev);
+ device_del(ddev);
return;
}
#endif
- disk_sysfs_add_subdirs(disk);
+ disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
- if (disk->minors == 1)
+ if (!disk_partitionable(disk))
goto exit;
/* No such device (e.g., media were just removed) */
@@ -458,41 +482,57 @@ void register_disk(struct gendisk *disk)
exit:
/* announce disk after possible partitions are created */
- disk->dev.uevent_suppress = 0;
- kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
+ ddev->uevent_suppress = 0;
+ kobject_uevent(&ddev->kobj, KOBJ_ADD);
/* announce possible partitions */
- for (i = 1; i < disk->minors; i++) {
- p = disk->part[i-1];
- if (!p || !p->nr_sects)
- continue;
- kobject_uevent(&p->dev.kobj, KOBJ_ADD);
- }
+ disk_part_iter_init(&piter, disk, 0);
+ while ((part = disk_part_iter_next(&piter)))
+ kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+ disk_part_iter_exit(&piter);
}
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
{
+ struct disk_part_iter piter;
+ struct hd_struct *part;
struct parsed_partitions *state;
- int p, res;
+ int p, highest, res;
if (bdev->bd_part_count)
return -EBUSY;
res = invalidate_partition(disk, 0);
if (res)
return res;
- bdev->bd_invalidated = 0;
- for (p = 1; p < disk->minors; p++)
- delete_partition(disk, p);
+
+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+ while ((part = disk_part_iter_next(&piter)))
+ delete_partition(disk, part->partno);
+ disk_part_iter_exit(&piter);
+
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
return 0;
if (IS_ERR(state)) /* I/O error reading the partition table */
return -EIO;
/* tell userspace that the media / partition table may have changed */
- kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
+ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+ /* Detect the highest partition number and preallocate
+ * disk->part_tbl. This is an optimization and not strictly
+ * necessary.
+ */
+ for (p = 1, highest = 0; p < state->limit; p++)
+ if (state->parts[p].size)
+ highest = p;
+
+ disk_expand_part_tbl(disk, highest);
+
+ /* add partitions */
for (p = 1; p < state->limit; p++) {
sector_t size = state->parts[p].size;
sector_t from = state->parts[p].from;
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector);
void del_gendisk(struct gendisk *disk)
{
- int p;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
/* invalidate stuff */
- for (p = disk->minors - 1; p > 0; p--) {
- invalidate_partition(disk, p);
- delete_partition(disk, p);
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ while ((part = disk_part_iter_next(&piter))) {
+ invalidate_partition(disk, part->partno);
+ delete_partition(disk, part->partno);
}
+ disk_part_iter_exit(&piter);
+
invalidate_partition(disk, 0);
- disk->capacity = 0;
+ blk_free_devt(disk_to_dev(disk)->devt);
+ set_capacity(disk, 0);
disk->flags &= ~GENHD_FL_UP;
unlink_gendisk(disk);
- disk_stat_set_all(disk, 0);
- disk->stamp = 0;
+ part_stat_set_all(&disk->part0, 0);
+ disk->part0.stamp = 0;
- kobject_put(disk->holder_dir);
+ kobject_put(disk->part0.holder_dir);
kobject_put(disk->slave_dir);
disk->driverfs_dev = NULL;
#ifndef CONFIG_SYSFS_DEPRECATED
- sysfs_remove_link(block_depr, disk->dev.bus_id);
+ sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
#endif
- device_del(&disk->dev);
+ device_del(disk_to_dev(disk));
}
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ecd9e8b..98dbe1a84528 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -5,15 +5,13 @@
* add_gd_partition adds a partitions details to the devices partition
* description.
*/
-enum { MAX_PART = 256 };
-
struct parsed_partitions {
char name[BDEVNAME_SIZE];
struct {
sector_t from;
sector_t size;
int flags;
- } parts[MAX_PART];
+ } parts[DISK_MAX_PARTS];
int next;
int limit;
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 52312ec93ff4..5145cb9125af 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = {
* size 0 on the assumption that it's going to be used for an mmap of shared
* memory
*/
-static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
{
struct pagevec lru_pvec;
unsigned long npages, xpages, loop, limit;
diff --git a/fs/splice.c b/fs/splice.c
index 1bbc6f4bb09c..a1e701c27156 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
if (unlikely(!(out->f_mode & FMODE_WRITE)))
return -EBADF;
+ if (unlikely(out->f_flags & O_APPEND))
+ return -EINVAL;
+
ret = rw_verify_area(WRITE, out, ppos, len);
if (unlikely(ret < 0))
return ret;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b9cb77473758..d7f7645779f2 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
for (i = 0; i < n; i++)
printk(KERN_DEBUG "\t ino %llu\n",
- le64_to_cpu(orph->inos[i]));
+ (unsigned long long)le64_to_cpu(orph->inos[i]));
break;
}
default:
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 2b267c9a1806..526c01ec8003 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
while (1) {
dbg_gen("feed '%s', ino %llu, new f_pos %#x",
- dent->name, le64_to_cpu(dent->inum),
+ dent->name, (unsigned long long)le64_to_cpu(dent->inum),
key_hash_flash(c, &dent->key));
ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index e045c8b55423..47814cde2407 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -507,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
rsvd_idx_lebs = 0;
lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
c->lst.taken_empty_lebs;
- ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
if (rsvd_idx_lebs < lebs)
/*
* OK to allocate an empty LEB, but we still don't want to go
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 13f1019c859f..02aba36fe3d4 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -334,15 +334,15 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
err = move_nodes(c, sleb);
if (err)
- goto out;
+ goto out_inc_seq;
err = gc_sync_wbufs(c);
if (err)
- goto out;
+ goto out_inc_seq;
err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
if (err)
- goto out;
+ goto out_inc_seq;
/* Allow for races with TNC */
c->gced_lnum = lnum;
@@ -369,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
out:
ubifs_scan_destroy(sleb);
return err;
+
+out_inc_seq:
+ /* We may have moved at least some nodes so allow for races with TNC */
+ c->gced_lnum = lnum;
+ smp_wmb();
+ c->gc_seq += 1;
+ smp_wmb();
+ goto out;
}
/**
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 7562464ac83f..3f4902060c7a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1024,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_dereg;
}
+ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
if (!mounted_read_only) {
err = alloc_wbufs(c);
if (err)
goto out_cbuf;
/* Create background thread */
- sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
- c->vi.vol_id);
c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
if (!c->bgt)
c->bgt = ERR_PTR(-EINVAL);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 7da209ab9378..7634c5970887 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1476,7 +1476,7 @@ again:
}
err = fallible_read_node(c, key, &zbr, node);
- if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
+ if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
/*
* The node may have been GC'ed out from under us so try again
* while keeping the TNC mutex locked.
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index f42f80a3b1fa..a44d68eb50b5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1338,6 +1338,10 @@ __xfs_get_blocks(
offset = (xfs_off_t)iblock << inode->i_blkbits;
ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
size = bh_result->b_size;
+
+ if (!create && direct && offset >= i_size_read(inode))
+ return 0;
+
error = xfs_iomap(XFS_I(inode), offset, size,
create ? flags : BMAPI_READ, &iomap, &niomap);
if (error)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 986061ae1b9b..36d5fcd3f593 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work(
* We can get an EOPNOTSUPP to ordered writes. Here we clear the
* ordered flag and reissue them. Because we can't tell the higher
* layers directly that they should not issue ordered I/O anymore, they
- * need to check if the ordered flag was cleared during I/O completion.
+ * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
*/
if ((bp->b_error == EOPNOTSUPP) &&
(bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
XB_TRACE(bp, "ordered_retry", bp->b_iodone);
bp->b_flags &= ~XBF_ORDERED;
+ bp->b_flags |= _XFS_BARRIER_FAILED;
xfs_buf_iorequest(bp);
} else if (bp->b_iodone)
(*(bp->b_iodone))(bp);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index fe0109956656..456519a088c7 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -85,6 +85,14 @@ typedef enum {
* modifications being lost.
*/
_XBF_PAGE_LOCKED = (1 << 22),
+
+ /*
+ * If we try a barrier write, but it fails we have to communicate
+ * this to the upper layers. Unfortunately b_error gets overwritten
+ * when the buffer is re-issued so we have to add another flag to
+ * keep this information.
+ */
+ _XFS_BARRIER_FAILED = (1 << 23),
} xfs_buf_flags_t;
typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 73c65f19e549..18d3c8487835 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1302,9 +1302,29 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
default:
+ /*
+ * Logically we would return an error here to prevent
+ * users from believing they might have changed
+ * mount options using remount which can't be changed.
+ *
+ * But unfortunately mount(8) adds all options from
+ * mtab and fstab to the mount arguments in some cases
+ * so we can't blindly reject options, but have to
+ * check for each specified option if it actually
+ * differs from the currently set option and only
+ * reject it if that's the case.
+ *
+ * Until that is implemented we return success for
+ * every remount request, and silently ignore all
+ * options that we can't actually change.
+ */
+#if 0
printk(KERN_INFO
"XFS: mount option \"%s\" not supported for remount\n", p);
return -EINVAL;
+#else
+ return 0;
+#endif
}
}
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 608c30c3f76b..002fc2617c8e 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -732,6 +732,7 @@ xfs_buf_item_init(
bip->bli_item.li_ops = &xfs_buf_item_ops;
bip->bli_item.li_mountp = mp;
bip->bli_buf = bp;
+ xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF;
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
@@ -867,6 +868,21 @@ xfs_buf_item_dirty(
return (bip->bli_flags & XFS_BLI_DIRTY);
}
+STATIC void
+xfs_buf_item_free(
+ xfs_buf_log_item_t *bip)
+{
+#ifdef XFS_TRANS_DEBUG
+ kmem_free(bip->bli_orig);
+ kmem_free(bip->bli_logged);
+#endif /* XFS_TRANS_DEBUG */
+
+#ifdef XFS_BLI_TRACE
+ ktrace_free(bip->bli_trace);
+#endif
+ kmem_zone_free(xfs_buf_item_zone, bip);
+}
+
/*
* This is called when the buf log item is no longer needed. It should
* free the buf log item associated with the given buffer and clear
@@ -887,18 +903,8 @@ xfs_buf_item_relse(
(XFS_BUF_IODONE_FUNC(bp) != NULL)) {
XFS_BUF_CLR_IODONE_FUNC(bp);
}
-
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- bip->bli_orig = NULL;
- kmem_free(bip->bli_logged);
- bip->bli_logged = NULL;
-#endif /* XFS_TRANS_DEBUG */
-
-#ifdef XFS_BLI_TRACE
- ktrace_free(bip->bli_trace);
-#endif
- kmem_zone_free(xfs_buf_item_zone, bip);
+ xfs_buf_rele(bp);
+ xfs_buf_item_free(bip);
}
@@ -1120,6 +1126,7 @@ xfs_buf_iodone(
ASSERT(bip->bli_buf == bp);
+ xfs_buf_rele(bp);
mp = bip->bli_item.li_mountp;
/*
@@ -1136,18 +1143,7 @@ xfs_buf_iodone(
* xfs_trans_delete_ail() drops the AIL lock.
*/
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
-
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- bip->bli_orig = NULL;
- kmem_free(bip->bli_logged);
- bip->bli_logged = NULL;
-#endif /* XFS_TRANS_DEBUG */
-
-#ifdef XFS_BLI_TRACE
- ktrace_free(bip->bli_trace);
-#endif
- kmem_zone_free(xfs_buf_item_zone, bip);
+ xfs_buf_item_free(bip);
}
#if defined(XFS_BLI_TRACE)
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 760f4c5b5160..75b0cd4da0ea 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -149,7 +149,14 @@ xfs_swap_extents(
sbp = &sxp->sx_stat;
- xfs_lock_two_inodes(ip, tip, lock_flags);
+ /*
+ * we have to do two separate lock calls here to keep lockdep
+ * happy. If we try to get all the locks in one call, lock will
+ * report false positives when we drop the ILOCK and regain them
+ * below.
+ */
+ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+ xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
locked = 1;
/* Verify that both files have the same format */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 00e80df9dd9d..dbd9cef852ec 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct(
ASSERT(nextents <= XFS_LINEAR_EXTS);
size = nextents * sizeof(xfs_bmbt_rec_t);
- xfs_iext_irec_compact_full(ifp);
+ xfs_iext_irec_compact_pages(ifp);
ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
ep = ifp->if_u1.if_ext_irec->er_extbuf;
@@ -4449,8 +4449,7 @@ xfs_iext_irec_remove(
* compaction policy is as follows:
*
* Full Compaction: Extents fit into a single page (or inline buffer)
- * Full Compaction: Extents occupy less than 10% of allocated space
- * Partial Compaction: Extents occupy > 10% and < 50% of allocated space
+ * Partial Compaction: Extents occupy less than 50% of allocated space
* No Compaction: Extents occupy at least 50% of allocated space
*/
void
@@ -4471,8 +4470,6 @@ xfs_iext_irec_compact(
xfs_iext_direct_to_inline(ifp, nextents);
} else if (nextents <= XFS_LINEAR_EXTS) {
xfs_iext_indirect_to_direct(ifp);
- } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) {
- xfs_iext_irec_compact_full(ifp);
} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
xfs_iext_irec_compact_pages(ifp);
}
@@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages(
erp_next = erp + 1;
if (erp_next->er_extcount <=
(XFS_LINEAR_EXTS - erp->er_extcount)) {
- memmove(&erp->er_extbuf[erp->er_extcount],
+ memcpy(&erp->er_extbuf[erp->er_extcount],
erp_next->er_extbuf, erp_next->er_extcount *
sizeof(xfs_bmbt_rec_t));
erp->er_extcount += erp_next->er_extcount;
@@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages(
}
/*
- * Fully compact the extent records managed by the indirection array.
- */
-void
-xfs_iext_irec_compact_full(
- xfs_ifork_t *ifp) /* inode fork pointer */
-{
- xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */
- xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */
- int erp_idx = 0; /* extent irec index */
- int ext_avail; /* empty entries in ex list */
- int ext_diff; /* number of exts to add */
- int nlists; /* number of irec's (ex lists) */
-
- ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
- erp = ifp->if_u1.if_ext_irec;
- ep = &erp->er_extbuf[erp->er_extcount];
- erp_next = erp + 1;
- ep_next = erp_next->er_extbuf;
-
- while (erp_idx < nlists - 1) {
- /*
- * Check how many extent records are available in this irec.
- * If there is none skip the whole exercise.
- */
- ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
- if (ext_avail) {
-
- /*
- * Copy over as many as possible extent records into
- * the previous page.
- */
- ext_diff = MIN(ext_avail, erp_next->er_extcount);
- memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
- erp->er_extcount += ext_diff;
- erp_next->er_extcount -= ext_diff;
-
- /*
- * If the next irec is empty now we can simply
- * remove it.
- */
- if (erp_next->er_extcount == 0) {
- /*
- * Free page before removing extent record
- * so er_extoffs don't get modified in
- * xfs_iext_irec_remove.
- */
- kmem_free(erp_next->er_extbuf);
- erp_next->er_extbuf = NULL;
- xfs_iext_irec_remove(ifp, erp_idx + 1);
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
- /*
- * If the next irec is not empty move up the content
- * that has not been copied to the previous page to
- * the beggining of this one.
- */
- } else {
- memmove(erp_next->er_extbuf, &ep_next[ext_diff],
- erp_next->er_extcount *
- sizeof(xfs_bmbt_rec_t));
- ep_next = erp_next->er_extbuf;
- memset(&ep_next[erp_next->er_extcount], 0,
- (XFS_LINEAR_EXTS -
- erp_next->er_extcount) *
- sizeof(xfs_bmbt_rec_t));
- }
- }
-
- if (erp->er_extcount == XFS_LINEAR_EXTS) {
- erp_idx++;
- if (erp_idx < nlists)
- erp = &ifp->if_u1.if_ext_irec[erp_idx];
- else
- break;
- }
- ep = &erp->er_extbuf[erp->er_extcount];
- erp_next = erp + 1;
- ep_next = erp_next->er_extbuf;
- }
-}
-
-/*
* This is called to update the er_extoff field in the indirection
* array when extents have been added or removed from one of the
* extent lists. erp_idx contains the irec index to begin updating
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ccba14eb9dbe..0b02c6443551 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
STATIC int xlog_iclogs_empty(xlog_t *log);
#if defined(XFS_LOG_TRACE)
+
+#define XLOG_TRACE_LOGGRANT_SIZE 2048
+#define XLOG_TRACE_ICLOG_SIZE 256
+
+void
+xlog_trace_loggrant_alloc(xlog_t *log)
+{
+ log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
+}
+
+void
+xlog_trace_loggrant_dealloc(xlog_t *log)
+{
+ ktrace_free(log->l_grant_trace);
+}
+
void
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
{
unsigned long cnts;
- if (!log->l_grant_trace) {
- log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
- if (!log->l_grant_trace)
- return;
- }
/* ticket counts are 1 byte each */
cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
}
void
+xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
+{
+ iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
+}
+
+void
+xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
+{
+ ktrace_free(iclog->ic_trace);
+}
+
+void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{
- if (!iclog->ic_trace)
- iclog->ic_trace = ktrace_alloc(256, KM_NOFS);
ktrace_enter(iclog->ic_trace,
(void *)((unsigned long)state),
(void *)((unsigned long)current_pid()),
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
(void *)NULL, (void *)NULL);
}
#else
+
+#define xlog_trace_loggrant_alloc(log)
+#define xlog_trace_loggrant_dealloc(log)
#define xlog_trace_loggrant(log,tic,string)
+
+#define xlog_trace_iclog_alloc(iclog)
+#define xlog_trace_iclog_dealloc(iclog)
#define xlog_trace_iclog(iclog,state)
+
#endif /* XFS_LOG_TRACE */
@@ -1005,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp)
l = iclog->ic_log;
/*
- * If the ordered flag has been removed by a lower
- * layer, it means the underlyin device no longer supports
+ * If the _XFS_BARRIER_FAILED flag was set by a lower
+ * layer, it means the underlying device no longer supports
* barrier I/O. Warn loudly and turn off barriers.
*/
- if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) {
+ if (bp->b_flags & _XFS_BARRIER_FAILED) {
+ bp->b_flags &= ~_XFS_BARRIER_FAILED;
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
xfs_fs_cmn_err(CE_WARN, l->l_mp,
"xlog_iodone: Barriers are no longer supported"
@@ -1231,6 +1260,7 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_grant_lock);
sv_init(&log->l_flush_wait, 0, "flush_wait");
+ xlog_trace_loggrant_alloc(log);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1285,6 +1315,8 @@ xlog_alloc_log(xfs_mount_t *mp,
sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
+ xlog_trace_iclog_alloc(iclog);
+
iclogp = &iclog->ic_next;
}
*iclogp = log->l_iclog; /* complete ring */
@@ -1565,11 +1597,7 @@ xlog_dealloc_log(xlog_t *log)
sv_destroy(&iclog->ic_force_wait);
sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
-#ifdef XFS_LOG_TRACE
- if (iclog->ic_trace != NULL) {
- ktrace_free(iclog->ic_trace);
- }
-#endif
+ xlog_trace_iclog_dealloc(iclog);
next_iclog = iclog->ic_next;
kmem_free(iclog);
iclog = next_iclog;
@@ -1578,14 +1606,7 @@ xlog_dealloc_log(xlog_t *log)
spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
-#ifdef XFS_LOG_TRACE
- if (log->l_trace != NULL) {
- ktrace_free(log->l_trace);
- }
- if (log->l_grant_trace != NULL) {
- ktrace_free(log->l_grant_trace);
- }
-#endif
+ xlog_trace_loggrant_dealloc(log);
log->l_mp->m_log = NULL;
kmem_free(log);
} /* xlog_dealloc_log */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c8a5b22ee3e3..e7d8f84443fa 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -448,7 +448,6 @@ typedef struct log {
int l_grant_write_bytes;
#ifdef XFS_LOG_TRACE
- struct ktrace *l_trace;
struct ktrace *l_grant_trace;
#endif
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index aa238c8fbd7a..8b6812f66a15 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1838,6 +1838,12 @@ again:
#endif
}
+/*
+ * xfs_lock_two_inodes() can only be used to lock one type of lock
+ * at a time - the iolock or the ilock, but not both at once. If
+ * we lock both at once, lockdep will report false positives saying
+ * we have violated locking orders.
+ */
void
xfs_lock_two_inodes(
xfs_inode_t *ip0,
@@ -1848,6 +1854,8 @@ xfs_lock_two_inodes(
int attempts = 0;
xfs_log_item_t *lp;
+ if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+ ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
ASSERT(ip0->i_ino != ip1->i_ino);
if (ip0->i_ino > ip1->i_ino) {
@@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */
/*
* Zero file bytes between startoff and endoff inclusive.
* The iolock is held exclusive and no blocks are buffered.
+ *
+ * This function is used by xfs_free_file_space() to zero
+ * partial blocks when the range to free is not block aligned.
+ * When unreserving space with boundaries that are not block
+ * aligned we round up the start and round down the end
+ * boundaries and then use this function to zero the parts of
+ * the blocks that got dropped during the rounding.
*/
STATIC int
xfs_zero_remaining_bytes(
@@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes(
int nimap;
int error = 0;
+ /*
+ * Avoid doing I/O beyond eof - it's not necessary
+ * since nothing can read beyond eof. The space will
+ * be zeroed when the file is extended anyway.
+ */
+ if (startoff >= ip->i_size)
+ return 0;
+
+ if (endoff > ip->i_size)
+ endoff = ip->i_size;
+
bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp);