summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bio-integrity.c29
-rw-r--r--fs/bio.c297
-rw-r--r--fs/block_dev.c182
-rw-r--r--fs/dlm/config.c77
-rw-r--r--fs/dlm/dlm_internal.h7
-rw-r--r--fs/dlm/lockspace.c158
-rw-r--r--fs/dlm/lockspace.h1
-rw-r--r--fs/dlm/user.c124
-rw-r--r--fs/dlm/user.h4
-rw-r--r--fs/fat/fatent.c14
-rw-r--r--fs/gfs2/glock.c15
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/incore.h38
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/locking/dlm/mount.c3
-rw-r--r--fs/gfs2/log.c21
-rw-r--r--fs/gfs2/mount.c7
-rw-r--r--fs/gfs2/ops_address.c18
-rw-r--r--fs/gfs2/ops_file.c16
-rw-r--r--fs/gfs2/ops_fstype.c578
-rw-r--r--fs/gfs2/ops_inode.c127
-rw-r--r--fs/gfs2/ops_super.c108
-rw-r--r--fs/gfs2/super.c340
-rw-r--r--fs/gfs2/super.h6
-rw-r--r--fs/gfs2/sys.c11
-rw-r--r--fs/partitions/check.c268
-rw-r--r--fs/partitions/check.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h8
-rw-r--r--fs/xfs/xfs_log.c7
31 files changed, 1441 insertions, 1192 deletions
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b35fe6..19caf7c962ac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
BUG_ON(bip == NULL);
/* A cloned bio doesn't own the integrity metadata */
- if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
+ if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
+ && bip->bip_buf != NULL)
kfree(bip->bip_buf);
mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_integrity_add_page);
+static int bdev_integrity_enabled(struct block_device *bdev, int rw)
+{
+ struct blk_integrity *bi = bdev_get_integrity(bdev);
+
+ if (bi == NULL)
+ return 0;
+
+ if (rw == READ && bi->verify_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_READ))
+ return 1;
+
+ if (rw == WRITE && bi->generate_fn != NULL &&
+ (bi->flags & INTEGRITY_FLAG_WRITE))
+ return 1;
+
+ return 0;
+}
+
/**
* bio_integrity_enabled - Check whether integrity can be passed
* @bio: bio to check
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio)
}
}
+static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
+{
+ if (bi)
+ return bi->tuple_size;
+
+ return 0;
+}
+
/**
* bio_integrity_prep - Prepare bio for integrity I/O
* @bio: bio to prepare
diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae34d75..77a55bcceedb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
static struct kmem_cache *bio_slab __read_mostly;
-mempool_t *bio_split_pool __read_mostly;
+static mempool_t *bio_split_pool __read_mostly;
/*
* if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
struct bio_vec *bvl;
/*
- * see comment near bvec_array define!
+ * If 'bs' is given, lookup the pool and do the mempool alloc.
+ * If not, this is a bio_kmalloc() allocation and just do a
+ * kzalloc() for the exact number of vecs right away.
*/
- switch (nr) {
- case 1 : *idx = 0; break;
- case 2 ... 4: *idx = 1; break;
- case 5 ... 16: *idx = 2; break;
- case 17 ... 64: *idx = 3; break;
- case 65 ... 128: *idx = 4; break;
- case 129 ... BIO_MAX_PAGES: *idx = 5; break;
+ if (bs) {
+ /*
+ * see comment near bvec_array define!
+ */
+ switch (nr) {
+ case 1:
+ *idx = 0;
+ break;
+ case 2 ... 4:
+ *idx = 1;
+ break;
+ case 5 ... 16:
+ *idx = 2;
+ break;
+ case 17 ... 64:
+ *idx = 3;
+ break;
+ case 65 ... 128:
+ *idx = 4;
+ break;
+ case 129 ... BIO_MAX_PAGES:
+ *idx = 5;
+ break;
default:
return NULL;
- }
- /*
- * idx now points to the pool we want to allocate from
- */
+ }
- bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
- if (bvl)
- memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+ /*
+ * idx now points to the pool we want to allocate from
+ */
+ bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
+ if (bvl)
+ memset(bvl, 0,
+ bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
+ } else
+ bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
return bvl;
}
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio)
bio_free(bio, fs_bio_set);
}
+static void bio_kmalloc_destructor(struct bio *bio)
+{
+ kfree(bio->bi_io_vec);
+ kfree(bio);
+}
+
void bio_init(struct bio *bio)
{
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
+ bio->bi_comp_cpu = -1;
atomic_set(&bio->bi_cnt, 1);
}
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio)
* bio_alloc_bioset - allocate a bio for I/O
* @gfp_mask: the GFP_ mask given to the slab allocator
* @nr_iovecs: number of iovecs to pre-allocate
- * @bs: the bio_set to allocate from
+ * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
*
* Description:
- * bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
+ * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
* If %__GFP_WAIT is set then we will block on the internal pool waiting
- * for a &struct bio to become free.
+ * for a &struct bio to become free. If a %NULL @bs is passed in, we will
+ * fall back to just using @kmalloc to allocate the required memory.
*
* allocate bio and iovecs from the memory pools specified by the
- * bio_set structure.
+ * bio_set structure, or @kmalloc if none given.
**/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
- struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask);
+ struct bio *bio;
+
+ if (bs)
+ bio = mempool_alloc(bs->bio_pool, gfp_mask);
+ else
+ bio = kmalloc(sizeof(*bio), gfp_mask);
if (likely(bio)) {
struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
if (unlikely(!bvl)) {
- mempool_free(bio, bs->bio_pool);
+ if (bs)
+ mempool_free(bio, bs->bio_pool);
+ else
+ kfree(bio);
bio = NULL;
goto out;
}
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
return bio;
}
+/*
+ * Like bio_alloc(), but doesn't use a mempool backing. This means that
+ * it CAN fail, but while bio_alloc() can only be used for allocations
+ * that have a short (finite) life span, bio_kmalloc() should be used
+ * for more permanent bio allocations (like allocating some bio's for
+ * initalization or setup purposes).
+ */
+struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
+{
+ struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
+
+ if (bio)
+ bio->bi_destructor = bio_kmalloc_destructor;
+
+ return bio;
+}
+
void zero_fill_bio(struct bio *bio)
{
unsigned long flags;
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
return bio->bi_phys_segments;
}
-inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
-{
- if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
- blk_recount_segments(q, bio);
-
- return bio->bi_hw_segments;
-}
-
/**
* __bio_clone - clone a bio
* @bio: destination bio
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
*/
while (bio->bi_phys_segments >= q->max_phys_segments
- || bio->bi_hw_segments >= q->max_hw_segments
- || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
+ || bio->bi_phys_segments >= q->max_hw_segments) {
if (retried_segments)
return 0;
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) ||
- BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
bio->bi_vcnt++;
bio->bi_phys_segments++;
- bio->bi_hw_segments++;
done:
bio->bi_size += len;
return len;
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
struct bio_map_data {
struct bio_vec *iovecs;
- int nr_sgvecs;
struct sg_iovec *sgvecs;
+ int nr_sgvecs;
+ int is_our_pages;
};
static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
- struct sg_iovec *iov, int iov_count)
+ struct sg_iovec *iov, int iov_count,
+ int is_our_pages)
{
memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
bmd->nr_sgvecs = iov_count;
+ bmd->is_our_pages = is_our_pages;
bio->bi_private = bmd;
}
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
}
static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
- struct sg_iovec *iov, int iov_count, int uncopy)
+ struct sg_iovec *iov, int iov_count, int uncopy,
+ int do_free_page)
{
int ret = 0, i;
struct bio_vec *bvec;
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
}
}
- if (uncopy)
+ if (do_free_page)
__free_page(bvec->bv_page);
}
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
- int ret;
-
- ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
+ int ret = 0;
+ if (!bio_flagged(bio, BIO_NULL_MAPPED))
+ ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+ bmd->nr_sgvecs, 1, bmd->is_our_pages);
bio_free_map_data(bmd);
bio_put(bio);
return ret;
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio)
/**
* bio_copy_user_iov - copy user data to bio
* @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
* @iov: the iovec.
* @iov_count: number of elements in the iovec
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
-struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
- int iov_count, int write_to_vm)
+struct bio *bio_copy_user_iov(struct request_queue *q,
+ struct rq_map_data *map_data,
+ struct sg_iovec *iov, int iov_count,
+ int write_to_vm, gfp_t gfp_mask)
{
struct bio_map_data *bmd;
struct bio_vec *bvec;
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
len += iov[i].iov_len;
}
- bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL);
+ bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
if (!bmd)
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ bio = bio_alloc(gfp_mask, nr_pages);
if (!bio)
goto out_bmd;
bio->bi_rw |= (!write_to_vm << BIO_RW);
ret = 0;
+ i = 0;
while (len) {
- unsigned int bytes = PAGE_SIZE;
+ unsigned int bytes;
+
+ if (map_data)
+ bytes = 1U << (PAGE_SHIFT + map_data->page_order);
+ else
+ bytes = PAGE_SIZE;
if (bytes > len)
bytes = len;
- page = alloc_page(q->bounce_gfp | GFP_KERNEL);
+ if (map_data) {
+ if (i == map_data->nr_entries) {
+ ret = -ENOMEM;
+ break;
+ }
+ page = map_data->pages[i++];
+ } else
+ page = alloc_page(q->bounce_gfp | gfp_mask);
if (!page) {
ret = -ENOMEM;
break;
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
* success
*/
if (!write_to_vm) {
- ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0);
+ ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
if (ret)
goto cleanup;
}
- bio_set_map_data(bmd, bio, iov, iov_count);
+ bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
return bio;
cleanup:
- bio_for_each_segment(bvec, bio, i)
- __free_page(bvec->bv_page);
+ if (!map_data)
+ bio_for_each_segment(bvec, bio, i)
+ __free_page(bvec->bv_page);
bio_put(bio);
out_bmd:
@@ -654,29 +720,32 @@ out_bmd:
/**
* bio_copy_user - copy user data to bio
* @q: destination block queue
+ * @map_data: pointer to the rq_map_data holding pages (if necessary)
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Prepares and returns a bio for indirect user io, bouncing data
* to/from kernel pages as necessary. Must be paired with
* call bio_uncopy_user() on io completion.
*/
-struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr,
- unsigned int len, int write_to_vm)
+struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
+ unsigned long uaddr, unsigned int len,
+ int write_to_vm, gfp_t gfp_mask)
{
struct sg_iovec iov;
iov.iov_base = (void __user *)uaddr;
iov.iov_len = len;
- return bio_copy_user_iov(q, &iov, 1, write_to_vm);
+ return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
}
static struct bio *__bio_map_user_iov(struct request_queue *q,
struct block_device *bdev,
struct sg_iovec *iov, int iov_count,
- int write_to_vm)
+ int write_to_vm, gfp_t gfp_mask)
{
int i, j;
int nr_pages = 0;
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
if (!nr_pages)
return ERR_PTR(-EINVAL);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ bio = bio_alloc(gfp_mask, nr_pages);
if (!bio)
return ERR_PTR(-ENOMEM);
ret = -ENOMEM;
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
+ pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
if (!pages)
goto out;
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
* @uaddr: start of user address
* @len: length in bytes
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len, int write_to_vm)
+ unsigned long uaddr, unsigned int len, int write_to_vm,
+ gfp_t gfp_mask)
{
struct sg_iovec iov;
iov.iov_base = (void __user *)uaddr;
iov.iov_len = len;
- return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+ return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
}
/**
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
* @iov: the iovec.
* @iov_count: number of elements in the iovec
* @write_to_vm: bool indicating writing to pages or not
+ * @gfp_mask: memory allocation flags
*
* Map the user space address into a bio suitable for io to a block
* device. Returns an error pointer in case of error.
*/
struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
struct sg_iovec *iov, int iov_count,
- int write_to_vm)
+ int write_to_vm, gfp_t gfp_mask)
{
struct bio *bio;
- bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
-
+ bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
+ gfp_mask);
if (IS_ERR(bio))
return bio;
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
gfp_t gfp_mask, int reading)
{
- unsigned long kaddr = (unsigned long)data;
- unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = kaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
struct bio *bio;
struct bio_vec *bvec;
- struct bio_map_data *bmd;
- int i, ret;
- struct sg_iovec iov;
-
- iov.iov_base = data;
- iov.iov_len = len;
-
- bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
- if (!bmd)
- return ERR_PTR(-ENOMEM);
-
- ret = -ENOMEM;
- bio = bio_alloc(gfp_mask, nr_pages);
- if (!bio)
- goto out_bmd;
-
- while (len) {
- struct page *page;
- unsigned int bytes = PAGE_SIZE;
-
- if (bytes > len)
- bytes = len;
-
- page = alloc_page(q->bounce_gfp | gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- goto cleanup;
- }
-
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
- ret = -EINVAL;
- goto cleanup;
- }
+ int i;
- len -= bytes;
- }
+ bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
+ if (IS_ERR(bio))
+ return bio;
if (!reading) {
void *p = data;
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
}
}
- bio->bi_private = bmd;
bio->bi_end_io = bio_copy_kern_endio;
- bio_set_map_data(bmd, bio, &iov, 1);
return bio;
-cleanup:
- bio_for_each_segment(bvec, bio, i)
- __free_page(bvec->bv_page);
-
- bio_put(bio);
-out_bmd:
- bio_free_map_data(bmd);
-
- return ERR_PTR(ret);
}
/*
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err)
* split a bio - only worry about a bio with a single page
* in it's iovec
*/
-struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
+struct bio_pair *bio_split(struct bio *bi, int first_sectors)
{
- struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO);
+ struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
if (!bp)
return bp;
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
bp->bio2.bi_end_io = bio_pair_end_2;
bp->bio1.bi_private = bi;
- bp->bio2.bi_private = pool;
+ bp->bio2.bi_private = bio_split_pool;
if (bio_integrity(bi))
bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
return bp;
}
+/**
+ * bio_sector_offset - Find hardware sector offset in bio
+ * @bio: bio to inspect
+ * @index: bio_vec index
+ * @offset: offset in bv_page
+ *
+ * Return the number of hardware sectors between beginning of bio
+ * and an end point indicated by a bio_vec index and an offset
+ * within that vector's page.
+ */
+sector_t bio_sector_offset(struct bio *bio, unsigned short index,
+ unsigned int offset)
+{
+ unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
+ struct bio_vec *bv;
+ sector_t sectors;
+ int i;
+
+ sectors = 0;
+
+ if (index >= bio->bi_idx)
+ index = bio->bi_vcnt - 1;
+
+ __bio_for_each_segment(bv, bio, i, 0) {
+ if (i == index) {
+ if (offset > bv->bv_offset)
+ sectors += (offset - bv->bv_offset) / sector_sz;
+ break;
+ }
+
+ sectors += bv->bv_len / sector_sz;
+ }
+
+ return sectors;
+}
+EXPORT_SYMBOL(bio_sector_offset);
/*
* create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@ static int __init init_bio(void)
subsys_initcall(init_bio);
EXPORT_SYMBOL(bio_alloc);
+EXPORT_SYMBOL(bio_kmalloc);
EXPORT_SYMBOL(bio_put);
EXPORT_SYMBOL(bio_free);
EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init);
EXPORT_SYMBOL(__bio_clone);
EXPORT_SYMBOL(bio_clone);
EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_hw_segments);
EXPORT_SYMBOL(bio_add_page);
EXPORT_SYMBOL(bio_add_pc_page);
EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern);
EXPORT_SYMBOL(bio_copy_kern);
EXPORT_SYMBOL(bio_pair_release);
EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_split_pool);
EXPORT_SYMBOL(bio_copy_user);
EXPORT_SYMBOL(bio_uncopy_user);
EXPORT_SYMBOL(bioset_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff54219e049..d84f0469a016 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release);
* /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
*/
-static struct kobject *bdev_get_kobj(struct block_device *bdev)
-{
- if (bdev->bd_contains != bdev)
- return kobject_get(&bdev->bd_part->dev.kobj);
- else
- return kobject_get(&bdev->bd_disk->dev.kobj);
-}
-
-static struct kobject *bdev_get_holder(struct block_device *bdev)
-{
- if (bdev->bd_contains != bdev)
- return kobject_get(bdev->bd_part->holder_dir);
- else
- return kobject_get(bdev->bd_disk->holder_dir);
-}
-
static int add_symlink(struct kobject *from, struct kobject *to)
{
if (!from || !to)
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev,
if (!bo->hdev)
goto fail_put_sdir;
- bo->sdev = bdev_get_kobj(bdev);
+ bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
if (!bo->sdev)
goto fail_put_hdev;
- bo->hdir = bdev_get_holder(bdev);
+ bo->hdir = kobject_get(bdev->bd_part->holder_dir);
if (!bo->hdir)
goto fail_put_sdev;
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
EXPORT_SYMBOL(open_by_devnum);
+/**
+ * flush_disk - invalidates all buffer-cache entries on a disk
+ *
+ * @bdev: struct block device to be flushed
+ *
+ * Invalidates all buffer-cache entries on a disk. It should be called
+ * when a disk has been changed -- either by a media change or online
+ * resize.
+ */
+static void flush_disk(struct block_device *bdev)
+{
+ if (__invalidate_device(bdev)) {
+ char name[BDEVNAME_SIZE] = "";
+
+ if (bdev->bd_disk)
+ disk_name(bdev->bd_disk, 0, name);
+ printk(KERN_WARNING "VFS: busy inodes on changed media or "
+ "resized disk %s\n", name);
+ }
+
+ if (!bdev->bd_disk)
+ return;
+ if (disk_partitionable(bdev->bd_disk))
+ bdev->bd_invalidated = 1;
+}
+
+/**
+ * check_disk_size_change - checks for disk size change and adjusts bdev size.
+ * @disk: struct gendisk to check
+ * @bdev: struct bdev to adjust.
+ *
+ * This routine checks to see if the bdev size does not match the disk size
+ * and adjusts it if it differs.
+ */
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
+{
+ loff_t disk_size, bdev_size;
+
+ disk_size = (loff_t)get_capacity(disk) << 9;
+ bdev_size = i_size_read(bdev->bd_inode);
+ if (disk_size != bdev_size) {
+ char name[BDEVNAME_SIZE];
+
+ disk_name(disk, 0, name);
+ printk(KERN_INFO
+ "%s: detected capacity change from %lld to %lld\n",
+ name, bdev_size, disk_size);
+ i_size_write(bdev->bd_inode, disk_size);
+ flush_disk(bdev);
+ }
+}
+EXPORT_SYMBOL(check_disk_size_change);
+
+/**
+ * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
+ * @disk: struct gendisk to be revalidated
+ *
+ * This routine is a wrapper for lower-level driver's revalidate_disk
+ * call-backs. It is used to do common pre and post operations needed
+ * for all revalidate_disk operations.
+ */
+int revalidate_disk(struct gendisk *disk)
+{
+ struct block_device *bdev;
+ int ret = 0;
+
+ if (disk->fops->revalidate_disk)
+ ret = disk->fops->revalidate_disk(disk);
+
+ bdev = bdget_disk(disk, 0);
+ if (!bdev)
+ return ret;
+
+ mutex_lock(&bdev->bd_mutex);
+ check_disk_size_change(disk, bdev);
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return ret;
+}
+EXPORT_SYMBOL(revalidate_disk);
+
/*
* This routine checks whether a removable media has been changed,
* and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev)
if (!bdops->media_changed(bdev->bd_disk))
return 0;
- if (__invalidate_device(bdev))
- printk("VFS: busy inodes on changed media.\n");
-
+ flush_disk(bdev);
if (bdops->revalidate_disk)
bdops->revalidate_disk(bdev->bd_disk);
- if (bdev->bd_disk->minors > 1)
- bdev->bd_invalidated = 1;
return 1;
}
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
static int do_open(struct block_device *bdev, struct file *file, int for_part)
{
- struct module *owner = NULL;
struct gendisk *disk;
+ struct hd_struct *part = NULL;
int ret;
- int part;
+ int partno;
int perm = 0;
if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
ret = -ENXIO;
file->f_mapping = bdev->bd_inode->i_mapping;
+
lock_kernel();
- disk = get_gendisk(bdev->bd_dev, &part);
- if (!disk) {
- unlock_kernel();
- bdput(bdev);
- return ret;
- }
- owner = disk->fops->owner;
+
+ disk = get_gendisk(bdev->bd_dev, &partno);
+ if (!disk)
+ goto out_unlock_kernel;
+ part = disk_get_part(disk, partno);
+ if (!part)
+ goto out_unlock_kernel;
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
bdev->bd_disk = disk;
+ bdev->bd_part = part;
bdev->bd_contains = bdev;
- if (!part) {
+ if (!partno) {
struct backing_dev_info *bdi;
if (disk->fops->open) {
ret = disk->fops->open(bdev->bd_inode, file);
if (ret)
- goto out_first;
+ goto out_clear;
}
if (!bdev->bd_openers) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
if (bdev->bd_invalidated)
rescan_partitions(disk, bdev);
} else {
- struct hd_struct *p;
struct block_device *whole;
whole = bdget_disk(disk, 0);
ret = -ENOMEM;
if (!whole)
- goto out_first;
+ goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
if (ret)
- goto out_first;
+ goto out_clear;
bdev->bd_contains = whole;
- p = disk->part[part - 1];
bdev->bd_inode->i_data.backing_dev_info =
whole->bd_inode->i_data.backing_dev_info;
- if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) {
+ if (!(disk->flags & GENHD_FL_UP) ||
+ !part || !part->nr_sects) {
ret = -ENXIO;
- goto out_first;
+ goto out_clear;
}
- kobject_get(&p->dev.kobj);
- bdev->bd_part = p;
- bd_set_size(bdev, (loff_t) p->nr_sects << 9);
+ bd_set_size(bdev, (loff_t)part->nr_sects << 9);
}
} else {
+ disk_put_part(part);
put_disk(disk);
- module_put(owner);
+ module_put(disk->fops->owner);
+ part = NULL;
+ disk = NULL;
if (bdev->bd_contains == bdev) {
if (bdev->bd_disk->fops->open) {
ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
if (ret)
- goto out;
+ goto out_unlock_bdev;
}
if (bdev->bd_invalidated)
rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
unlock_kernel();
return 0;
-out_first:
+ out_clear:
bdev->bd_disk = NULL;
+ bdev->bd_part = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
__blkdev_put(bdev->bd_contains, 1);
bdev->bd_contains = NULL;
- put_disk(disk);
- module_put(owner);
-out:
+ out_unlock_bdev:
mutex_unlock(&bdev->bd_mutex);
+ out_unlock_kernel:
unlock_kernel();
- if (ret)
- bdput(bdev);
+
+ disk_put_part(part);
+ if (disk)
+ module_put(disk->fops->owner);
+ put_disk(disk);
+ bdput(bdev);
+
return ret;
}
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
put_disk(disk);
module_put(owner);
-
- if (bdev->bd_contains != bdev) {
- kobject_put(&bdev->bd_part->dev.kobj);
- bdev->bd_part = NULL;
- }
+ disk_put_part(bdev->bd_part);
+ bdev->bd_part = NULL;
bdev->bd_disk = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev);
/**
* lookup_bdev - lookup a struct block_device by name
+ * @pathname: special file representing the block device
*
- * @path: special file representing the block device
- *
- * Get a reference to the blockdevice at @path in the current
+ * Get a reference to the blockdevice at @pathname in the current
* namespace if possible and return it. Return ERR_PTR(error)
* otherwise.
*/
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 89d2fb7b991a..fd9859f92fad 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -14,6 +14,9 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/configfs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
#include <net/sock.h>
#include "config.h"
@@ -377,24 +380,24 @@ static struct config_item_type node_type = {
.ct_owner = THIS_MODULE,
};
-static struct dlm_cluster *to_cluster(struct config_item *i)
+static struct dlm_cluster *config_item_to_cluster(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct dlm_cluster, group) :
NULL;
}
-static struct dlm_space *to_space(struct config_item *i)
+static struct dlm_space *config_item_to_space(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct dlm_space, group) :
NULL;
}
-static struct dlm_comm *to_comm(struct config_item *i)
+static struct dlm_comm *config_item_to_comm(struct config_item *i)
{
return i ? container_of(i, struct dlm_comm, item) : NULL;
}
-static struct dlm_node *to_node(struct config_item *i)
+static struct dlm_node *config_item_to_node(struct config_item *i)
{
return i ? container_of(i, struct dlm_node, item) : NULL;
}
@@ -450,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g,
static void drop_cluster(struct config_group *g, struct config_item *i)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct config_item *tmp;
int j;
@@ -468,7 +471,7 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
kfree(cl->group.default_groups);
kfree(cl);
}
@@ -507,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
static void drop_space(struct config_group *g, struct config_item *i)
{
- struct dlm_space *sp = to_space(i);
+ struct dlm_space *sp = config_item_to_space(i);
struct config_item *tmp;
int j;
@@ -524,7 +527,7 @@ static void drop_space(struct config_group *g, struct config_item *i)
static void release_space(struct config_item *i)
{
- struct dlm_space *sp = to_space(i);
+ struct dlm_space *sp = config_item_to_space(i);
kfree(sp->group.default_groups);
kfree(sp);
}
@@ -546,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
static void drop_comm(struct config_group *g, struct config_item *i)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
if (local_comm == cm)
local_comm = NULL;
dlm_lowcomms_close(cm->nodeid);
@@ -557,13 +560,13 @@ static void drop_comm(struct config_group *g, struct config_item *i)
static void release_comm(struct config_item *i)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
kfree(cm);
}
static struct config_item *make_node(struct config_group *g, const char *name)
{
- struct dlm_space *sp = to_space(g->cg_item.ci_parent);
+ struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
struct dlm_node *nd;
nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
@@ -585,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name)
static void drop_node(struct config_group *g, struct config_item *i)
{
- struct dlm_space *sp = to_space(g->cg_item.ci_parent);
- struct dlm_node *nd = to_node(i);
+ struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
+ struct dlm_node *nd = config_item_to_node(i);
mutex_lock(&sp->members_lock);
list_del(&nd->list);
@@ -598,7 +601,7 @@ static void drop_node(struct config_group *g, struct config_item *i)
static void release_node(struct config_item *i)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
kfree(nd);
}
@@ -632,7 +635,7 @@ void dlm_config_exit(void)
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->show ? cla->show(cl, buf) : 0;
@@ -642,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_cluster *cl = to_cluster(i);
+ struct dlm_cluster *cl = config_item_to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->store ? cla->store(cl, buf, len) : -EINVAL;
@@ -651,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i,
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->show ? cma->show(cm, buf) : 0;
@@ -660,7 +663,7 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_comm *cm = to_comm(i);
+ struct dlm_comm *cm = config_item_to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->store ? cma->store(cm, buf, len) : -EINVAL;
@@ -714,7 +717,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->show ? nda->show(nd, buf) : 0;
@@ -723,7 +726,7 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
- struct dlm_node *nd = to_node(i);
+ struct dlm_node *nd = config_item_to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->store ? nda->store(nd, buf, len) : -EINVAL;
@@ -768,7 +771,7 @@ static struct dlm_space *get_space(char *name)
i = config_group_find_item(space_list, name);
mutex_unlock(&space_list->cg_subsys->su_mutex);
- return to_space(i);
+ return config_item_to_space(i);
}
static void put_space(struct dlm_space *sp)
@@ -776,6 +779,33 @@ static void put_space(struct dlm_space *sp)
config_item_put(&sp->group.cg_item);
}
+static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y)
+{
+ switch (x->ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sinx = (struct sockaddr_in *)x;
+ struct sockaddr_in *siny = (struct sockaddr_in *)y;
+ if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr)
+ return 0;
+ if (sinx->sin_port != siny->sin_port)
+ return 0;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x;
+ struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y;
+ if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr))
+ return 0;
+ if (sinx->sin6_port != siny->sin6_port)
+ return 0;
+ break;
+ }
+ default:
+ return 0;
+ }
+ return 1;
+}
+
static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
{
struct config_item *i;
@@ -788,7 +818,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
mutex_lock(&clusters_root.subsys.su_mutex);
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
- cm = to_comm(i);
+ cm = config_item_to_comm(i);
if (nodeid) {
if (cm->nodeid != nodeid)
@@ -797,8 +827,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr)
config_item_get(i);
break;
} else {
- if (!cm->addr_count ||
- memcmp(cm->addr[0], addr, sizeof(*addr)))
+ if (!cm->addr_count || !addr_compare(cm->addr[0], addr))
continue;
found = 1;
config_item_get(i);
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5a7ac33b629c..868e4c9ef127 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -441,8 +441,11 @@ struct dlm_ls {
uint32_t ls_global_id; /* global unique lockspace ID */
uint32_t ls_exflags;
int ls_lvblen;
- int ls_count; /* reference count */
+ int ls_count; /* refcount of processes in
+ the dlm using this ls */
+ int ls_create_count; /* create/release refcount */
unsigned long ls_flags; /* LSFL_ */
+ unsigned long ls_scan_time;
struct kobject ls_kobj;
struct dlm_rsbtable *ls_rsbtbl;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 499e16759e96..d910501de6d2 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -23,6 +23,7 @@
#include "lock.h"
#include "recover.h"
#include "requestqueue.h"
+#include "user.h"
static int ls_count;
static struct mutex ls_lock;
@@ -211,19 +212,41 @@ void dlm_lockspace_exit(void)
kset_unregister(dlm_kset);
}
+static struct dlm_ls *find_ls_to_scan(void)
+{
+ struct dlm_ls *ls;
+
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (time_after_eq(jiffies, ls->ls_scan_time +
+ dlm_config.ci_scan_secs * HZ)) {
+ spin_unlock(&lslist_lock);
+ return ls;
+ }
+ }
+ spin_unlock(&lslist_lock);
+ return NULL;
+}
+
static int dlm_scand(void *data)
{
struct dlm_ls *ls;
+ int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
while (!kthread_should_stop()) {
- list_for_each_entry(ls, &lslist, ls_list) {
+ ls = find_ls_to_scan();
+ if (ls) {
if (dlm_lock_recovery_try(ls)) {
+ ls->ls_scan_time = jiffies;
dlm_scan_rsbs(ls);
dlm_scan_timeout(ls);
dlm_unlock_recovery(ls);
+ } else {
+ ls->ls_scan_time += HZ;
}
+ } else {
+ schedule_timeout_interruptible(timeout_jiffies);
}
- schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
@@ -246,23 +269,6 @@ static void dlm_scand_stop(void)
kthread_stop(scand_task);
}
-static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen)
-{
- struct dlm_ls *ls;
-
- spin_lock(&lslist_lock);
-
- list_for_each_entry(ls, &lslist, ls_list) {
- if (ls->ls_namelen == namelen &&
- memcmp(ls->ls_name, name, namelen) == 0)
- goto out;
- }
- ls = NULL;
- out:
- spin_unlock(&lslist_lock);
- return ls;
-}
-
struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
{
struct dlm_ls *ls;
@@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls)
for (;;) {
spin_lock(&lslist_lock);
if (ls->ls_count == 0) {
+ WARN_ON(ls->ls_create_count != 0);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
return;
@@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
struct dlm_ls *ls;
- int i, size, error = -ENOMEM;
+ int i, size, error;
int do_unreg = 0;
if (namelen > DLM_LOCKSPACE_LEN)
@@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
if (!try_module_get(THIS_MODULE))
return -EINVAL;
- ls = dlm_find_lockspace_name(name, namelen);
- if (ls) {
- *lockspace = ls;
+ if (!dlm_user_daemon_available()) {
+ module_put(THIS_MODULE);
+ return -EUNATCH;
+ }
+
+ error = 0;
+
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ WARN_ON(ls->ls_create_count <= 0);
+ if (ls->ls_namelen != namelen)
+ continue;
+ if (memcmp(ls->ls_name, name, namelen))
+ continue;
+ if (flags & DLM_LSFL_NEWEXCL) {
+ error = -EEXIST;
+ break;
+ }
+ ls->ls_create_count++;
module_put(THIS_MODULE);
- return -EEXIST;
+ error = 1; /* not an error, return 0 */
+ break;
}
+ spin_unlock(&lslist_lock);
+
+ if (error < 0)
+ goto out;
+ if (error)
+ goto ret_zero;
+
+ error = -ENOMEM;
ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
if (!ls)
@@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lvblen = lvblen;
ls->ls_count = 0;
ls->ls_flags = 0;
+ ls->ls_scan_time = jiffies;
if (flags & DLM_LSFL_TIMEWARN)
set_bit(LSFL_TIMEWARN, &ls->ls_flags);
@@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_allocation = GFP_KERNEL;
/* ls_exflags are forced to match among nodes, and we don't
- need to require all nodes to have TIMEWARN or FS set */
- ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+ need to require all nodes to have some flags set */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
+ DLM_LSFL_NEWEXCL));
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
@@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
down_write(&ls->ls_in_recovery);
spin_lock(&lslist_lock);
+ ls->ls_create_count = 1;
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
@@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
dlm_create_debug_file(ls);
log_debug(ls, "join complete");
-
+ ret_zero:
*lockspace = ls;
return 0;
@@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force)
struct dlm_lkb *lkb;
struct dlm_rsb *rsb;
struct list_head *head;
- int i;
- int busy = lockspace_busy(ls);
+ int i, busy, rv;
+
+ busy = lockspace_busy(ls);
+
+ spin_lock(&lslist_lock);
+ if (ls->ls_create_count == 1) {
+ if (busy > force)
+ rv = -EBUSY;
+ else {
+ /* remove_lockspace takes ls off lslist */
+ ls->ls_create_count = 0;
+ rv = 0;
+ }
+ } else if (ls->ls_create_count > 1) {
+ rv = --ls->ls_create_count;
+ } else {
+ rv = -EINVAL;
+ }
+ spin_unlock(&lslist_lock);
- if (busy > force)
- return -EBUSY;
+ if (rv) {
+ log_debug(ls, "release_lockspace no remove %d", rv);
+ return rv;
+ }
+
+ dlm_device_deregister(ls);
- if (force < 3)
+ if (force < 3 && dlm_user_daemon_available())
do_uevent(ls, 0);
dlm_recoverd_stop(ls);
@@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_clear_members(ls);
dlm_clear_members_gone(ls);
kfree(ls->ls_node_array);
+ log_debug(ls, "release_lockspace final free");
kobject_put(&ls->ls_kobj);
/* The ls structure will be freed when the kobject is done with */
- mutex_lock(&ls_lock);
- ls_count--;
- if (!ls_count)
- threads_stop();
- mutex_unlock(&ls_lock);
-
module_put(THIS_MODULE);
return 0;
}
@@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force)
int dlm_release_lockspace(void *lockspace, int force)
{
struct dlm_ls *ls;
+ int error;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
dlm_put_lockspace(ls);
- return release_lockspace(ls, force);
+
+ mutex_lock(&ls_lock);
+ error = release_lockspace(ls, force);
+ if (!error)
+ ls_count--;
+ else if (!ls_count)
+ threads_stop();
+ mutex_unlock(&ls_lock);
+
+ return error;
+}
+
+void dlm_stop_lockspaces(void)
+{
+ struct dlm_ls *ls;
+
+ restart:
+ spin_lock(&lslist_lock);
+ list_for_each_entry(ls, &lslist, ls_list) {
+ if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
+ continue;
+ spin_unlock(&lslist_lock);
+ log_error(ls, "no userland control daemon, stopping lockspace");
+ dlm_ls_stop(ls);
+ goto restart;
+ }
+ spin_unlock(&lslist_lock);
}
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h
index 891eabbdd021..f879f87901f8 100644
--- a/fs/dlm/lockspace.h
+++ b/fs/dlm/lockspace.h
@@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
struct dlm_ls *dlm_find_lockspace_local(void *id);
struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
+void dlm_stop_lockspaces(void);
#endif /* __LOCKSPACE_DOT_H__ */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 34f14a14fb4e..b3832c67194a 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -15,7 +15,6 @@
#include <linux/poll.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
#include <linux/dlm.h>
#include <linux/dlm_device.h>
@@ -27,6 +26,8 @@
static const char name_prefix[] = "dlm";
static const struct file_operations device_fops;
+static atomic_t dlm_monitor_opened;
+static int dlm_monitor_unused = 1;
#ifdef CONFIG_COMPAT
@@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc,
return error;
}
-static int create_misc_device(struct dlm_ls *ls, char *name)
+static int dlm_device_register(struct dlm_ls *ls, char *name)
{
int error, len;
+ /* The device is already registered. This happens when the
+ lockspace is created multiple times from userspace. */
+ if (ls->ls_device.name)
+ return 0;
+
error = -ENOMEM;
len = strlen(name) + strlen(name_prefix) + 2;
ls->ls_device.name = kzalloc(len, GFP_KERNEL);
@@ -363,6 +369,22 @@ fail:
return error;
}
+int dlm_device_deregister(struct dlm_ls *ls)
+{
+ int error;
+
+ /* The device is not registered. This happens when the lockspace
+ was never used from userspace, or when device_create_lockspace()
+ calls dlm_release_lockspace() after the register fails. */
+ if (!ls->ls_device.name)
+ return 0;
+
+ error = misc_deregister(&ls->ls_device);
+ if (!error)
+ kfree(ls->ls_device.name);
+ return error;
+}
+
static int device_user_purge(struct dlm_user_proc *proc,
struct dlm_purge_params *params)
{
@@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
if (!ls)
return -ENOENT;
- error = create_misc_device(ls, params->name);
+ error = dlm_device_register(ls, params->name);
dlm_put_lockspace(ls);
if (error)
@@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
if (!ls)
return -ENOENT;
- /* Deregister the misc device first, so we don't have
- * a device that's not attached to a lockspace. If
- * dlm_release_lockspace fails then we can recreate it
- */
- error = misc_deregister(&ls->ls_device);
- if (error) {
- dlm_put_lockspace(ls);
- goto out;
- }
- kfree(ls->ls_device.name);
-
if (params->flags & DLM_USER_LSFLG_FORCEFREE)
force = 2;
lockspace = ls->ls_local_handle;
+ dlm_put_lockspace(ls);
- /* dlm_release_lockspace waits for references to go to zero,
- so all processes will need to close their device for the ls
- before the release will procede */
+ /* The final dlm_release_lockspace waits for references to go to
+ zero, so all processes will need to close their device for the
+ ls before the release will proceed. release also calls the
+ device_deregister above. Converting a positive return value
+ from release to zero means that userspace won't know when its
+ release was the final one, but it shouldn't need to know. */
- dlm_put_lockspace(ls);
error = dlm_release_lockspace(lockspace, force);
- if (error)
- create_misc_device(ls, ls->ls_name);
- out:
+ if (error > 0)
+ error = 0;
return error;
}
@@ -623,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file)
struct dlm_user_proc *proc;
struct dlm_ls *ls;
- lock_kernel();
ls = dlm_find_lockspace_device(iminor(inode));
- if (!ls) {
- unlock_kernel();
+ if (!ls)
return -ENOENT;
- }
proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
if (!proc) {
dlm_put_lockspace(ls);
- unlock_kernel();
return -ENOMEM;
}
@@ -645,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file)
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
file->private_data = proc;
- unlock_kernel();
return 0;
}
@@ -878,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait)
return 0;
}
+int dlm_user_daemon_available(void)
+{
+ /* dlm_controld hasn't started (or, has started, but not
+ properly populated configfs) */
+
+ if (!dlm_our_nodeid())
+ return 0;
+
+ /* This is to deal with versions of dlm_controld that don't
+ know about the monitor device. We assume that if the
+ dlm_controld was started (above), but the monitor device
+ was never opened, that it's an old version. dlm_controld
+ should open the monitor device before populating configfs. */
+
+ if (dlm_monitor_unused)
+ return 1;
+
+ return atomic_read(&dlm_monitor_opened) ? 1 : 0;
+}
+
static int ctl_device_open(struct inode *inode, struct file *file)
{
- cycle_kernel_lock();
file->private_data = NULL;
return 0;
}
@@ -890,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file)
return 0;
}
+static int monitor_device_open(struct inode *inode, struct file *file)
+{
+ atomic_inc(&dlm_monitor_opened);
+ dlm_monitor_unused = 0;
+ return 0;
+}
+
+static int monitor_device_close(struct inode *inode, struct file *file)
+{
+ if (atomic_dec_and_test(&dlm_monitor_opened))
+ dlm_stop_lockspaces();
+ return 0;
+}
+
static const struct file_operations device_fops = {
.open = device_open,
.release = device_close,
@@ -913,19 +954,42 @@ static struct miscdevice ctl_device = {
.minor = MISC_DYNAMIC_MINOR,
};
+static const struct file_operations monitor_device_fops = {
+ .open = monitor_device_open,
+ .release = monitor_device_close,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice monitor_device = {
+ .name = "dlm-monitor",
+ .fops = &monitor_device_fops,
+ .minor = MISC_DYNAMIC_MINOR,
+};
+
int __init dlm_user_init(void)
{
int error;
+ atomic_set(&dlm_monitor_opened, 0);
+
error = misc_register(&ctl_device);
- if (error)
+ if (error) {
log_print("misc_register failed for control device");
+ goto out;
+ }
+ error = misc_register(&monitor_device);
+ if (error) {
+ log_print("misc_register failed for monitor device");
+ misc_deregister(&ctl_device);
+ }
+ out:
return error;
}
void dlm_user_exit(void)
{
misc_deregister(&ctl_device);
+ misc_deregister(&monitor_device);
}
diff --git a/fs/dlm/user.h b/fs/dlm/user.h
index d38e9f3e4151..35eb6a13d616 100644
--- a/fs/dlm/user.h
+++ b/fs/dlm/user.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2006 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -12,5 +12,7 @@
void dlm_user_add_ast(struct dlm_lkb *lkb, int type);
int dlm_user_init(void);
void dlm_user_exit(void);
+int dlm_device_deregister(struct dlm_ls *ls);
+int dlm_user_daemon_available(void);
#endif
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c4af7e..fb98b3d847ed 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/msdos_fs.h>
+#include <linux/blkdev.h>
struct fatent_operations {
void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
struct fat_entry fatent;
struct buffer_head *bhs[MAX_BUF_PER_PAGE];
int i, err, nr_bhs;
+ int first_cl = cluster;
nr_bhs = 0;
fatent_init(&fatent);
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster)
goto error;
}
+ /*
+ * Issue discard for the sectors we no longer care about,
+ * batching contiguous clusters into one request
+ */
+ if (cluster != fatent.entry + 1) {
+ int nr_clus = fatent.entry - first_cl + 1;
+
+ sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
+ nr_clus * sbi->sec_per_clus);
+ first_cl = cluster;
+ }
+
ops->ent_put(&fatent, FAT_ENT_FREE);
if (sbi->free_clusters != -1) {
sbi->free_clusters++;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 13391e546616..c962283d4e7f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
+ if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
+ delay = gl->gl_ops->go_min_hold_time;
spin_lock(&gl->gl_spin);
handle_callback(gl, state, 1, delay);
@@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
*p++ = 'a';
if (flags & GL_EXACT)
*p++ = 'E';
- if (flags & GL_ATIME)
- *p++ = 'a';
if (flags & GL_NOCACHE)
*p++ = 'c';
if (test_bit(HIF_HOLDER, &iflags))
@@ -1816,15 +1816,17 @@ restart:
if (gl) {
gi->gl = hlist_entry(gl->gl_list.next,
struct gfs2_glock, gl_list);
- if (gi->gl)
- gfs2_glock_hold(gi->gl);
+ } else {
+ gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
+ struct gfs2_glock, gl_list);
}
+ if (gi->gl)
+ gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
if (gl)
gfs2_glock_put(gl);
- if (gl && gi->gl == NULL)
- gi->hash++;
while (gi->gl == NULL) {
+ gi->hash++;
if (gi->hash >= GFS2_GL_HASH_SIZE)
return 1;
read_lock(gl_lock_addr(gi->hash));
@@ -1833,7 +1835,6 @@ restart:
if (gi->gl)
gfs2_glock_hold(gi->gl);
read_unlock(gl_lock_addr(gi->hash));
- gi->hash++;
}
if (gi->sdp != gi->gl->gl_sbd)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 971d92af70fc..695c6b193611 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -24,7 +24,6 @@
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
-#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GLR_TRYFAILED 13
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 448697a5c462..f566ec1b4e8e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -386,20 +386,21 @@ struct gfs2_statfs_change_host {
#define GFS2_DATA_ORDERED 2
struct gfs2_args {
- char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
- char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
- char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
- int ar_spectator; /* Don't get a journal because we're always RO */
- int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
- int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
- int ar_localcaching; /* Local-style caching (dangerous on multihost) */
- int ar_debug; /* Oops on errors instead of trying to be graceful */
- int ar_upgrade; /* Upgrade ondisk/multihost format */
- unsigned int ar_num_glockd; /* Number of glockd threads */
- int ar_posix_acl; /* Enable posix acls */
- int ar_quota; /* off/account/on */
- int ar_suiddir; /* suiddir support */
- int ar_data; /* ordered/writeback */
+ char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
+ char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
+ char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
+ unsigned int ar_spectator:1; /* Don't get a journal */
+ unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */
+ unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */
+ unsigned int ar_localcaching:1; /* Local caching */
+ unsigned int ar_debug:1; /* Oops on errors */
+ unsigned int ar_upgrade:1; /* Upgrade ondisk format */
+ unsigned int ar_posix_acl:1; /* Enable posix acls */
+ unsigned int ar_quota:2; /* off/account/on */
+ unsigned int ar_suiddir:1; /* suiddir support */
+ unsigned int ar_data:2; /* ordered/writeback */
+ unsigned int ar_meta:1; /* mount metafs */
+ unsigned int ar_num_glockd; /* Number of glockd threads */
};
struct gfs2_tune {
@@ -419,7 +420,6 @@ struct gfs2_tune {
unsigned int gt_quota_scale_den; /* Denominator */
unsigned int gt_quota_cache_secs;
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
- unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_stall_secs; /* Detects trouble! */
@@ -432,7 +432,7 @@ enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
SDF_SHUTDOWN = 2,
- SDF_NOATIME = 3,
+ SDF_NOBARRIERS = 3,
};
#define GFS2_FSNAME_LEN 256
@@ -461,7 +461,6 @@ struct gfs2_sb_host {
struct gfs2_sbd {
struct super_block *sd_vfs;
- struct super_block *sd_vfs_meta;
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
@@ -499,7 +498,9 @@ struct gfs2_sbd {
/* Inode Stuff */
- struct inode *sd_master_dir;
+ struct dentry *sd_master_dir;
+ struct dentry *sd_root_dir;
+
struct inode *sd_jindex;
struct inode *sd_inum_inode;
struct inode *sd_statfs_inode;
@@ -634,7 +635,6 @@ struct gfs2_sbd {
/* Debugging crud */
unsigned long sd_last_warning;
- struct vfsmount *sd_gfs2mnt;
struct dentry *debugfs_dir; /* debugfs directory */
struct dentry *debugfs_dentry_glocks; /* for debugfs */
};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8b0806a32948..7cee695fa441 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -18,6 +18,7 @@
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include <linux/security.h>
+#include <linux/time.h>
#include "gfs2.h"
#include "incore.h"
@@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
struct gfs2_dinode_host *di = &ip->i_di;
const struct gfs2_dinode *str = buf;
+ struct timespec atime;
u16 height, depth;
if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
@@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
di->di_size = be64_to_cpu(str->di_size);
i_size_write(&ip->i_inode, di->di_size);
gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
- ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
- ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+ atime.tv_sec = be64_to_cpu(str->di_atime);
+ atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
+ if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
+ ip->i_inode.i_atime = atime;
ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
@@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
if (bh)
brelse(bh);
- if (!inode)
- return ERR_PTR(-ENOMEM);
return inode;
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
- if (inode)
+ if (inode && !IS_ERR(inode))
iput(inode);
fail_gunlock:
gfs2_glock_dq(ghs);
@@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
return 0;
}
-/*
- * gfs2_ok_to_move - check if it's ok to move a directory to another directory
- * @this: move this
- * @to: to here
- *
- * Follow @to back to the root and make sure we don't encounter @this
- * Assumes we already hold the rename lock.
- *
- * Returns: errno
- */
-
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
-{
- struct inode *dir = &to->i_inode;
- struct super_block *sb = dir->i_sb;
- struct inode *tmp;
- struct qstr dotdot;
- int error = 0;
-
- gfs2_str2qstr(&dotdot, "..");
-
- igrab(dir);
-
- for (;;) {
- if (dir == &this->i_inode) {
- error = -EINVAL;
- break;
- }
- if (dir == sb->s_root->d_inode) {
- error = 0;
- break;
- }
-
- tmp = gfs2_lookupi(dir, &dotdot, 1);
- if (IS_ERR(tmp)) {
- error = PTR_ERR(tmp);
- break;
- }
-
- iput(dir);
- dir = tmp;
- }
-
- iput(dir);
-
- return error;
-}
-
/**
* gfs2_readlinki - return the contents of a symlink
* @ip: the symlink's inode
@@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
unsigned int x;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
- error = gfs2_glock_nq_atime(&i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+ error = gfs2_glock_nq(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
return error;
@@ -1243,101 +1197,6 @@ out:
return error;
}
-/**
- * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
- * conditionally update the inode's atime
- * @gh: the holder to acquire
- *
- * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
- * Update if the difference between the current time and the inode's current
- * atime is greater than an interval specified at mount.
- *
- * Returns: errno
- */
-
-int gfs2_glock_nq_atime(struct gfs2_holder *gh)
-{
- struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct gfs2_inode *ip = gl->gl_object;
- s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
- unsigned int state;
- int flags;
- int error;
- struct timespec tv = CURRENT_TIME;
-
- if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
- gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
- gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
- return -EINVAL;
-
- state = gh->gh_state;
- flags = gh->gh_flags;
-
- error = gfs2_glock_nq(gh);
- if (error)
- return error;
-
- if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
- (sdp->sd_vfs->s_flags & MS_RDONLY))
- return 0;
-
- if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
- gfs2_glock_dq(gh);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
- gh);
- error = gfs2_glock_nq(gh);
- if (error)
- return error;
-
- /* Verify that atime hasn't been updated while we were
- trying to get exclusive lock. */
-
- tv = CURRENT_TIME;
- if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
- struct buffer_head *dibh;
- struct gfs2_dinode *di;
-
- error = gfs2_trans_begin(sdp, RES_DINODE, 0);
- if (error == -EROFS)
- return 0;
- if (error)
- goto fail;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_inode.i_atime = tv;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- di = (struct gfs2_dinode *)dibh->b_data;
- di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
- di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
- brelse(dibh);
-
- gfs2_trans_end(sdp);
- }
-
- /* If someone else has asked for the glock,
- unlock and let them have it. Then reacquire
- in the original state. */
- if (gfs2_glock_is_blocking(gl)) {
- gfs2_glock_dq(gh);
- gfs2_holder_reinit(state, flags, gh);
- return gfs2_glock_nq(gh);
- }
- }
-
- return 0;
-
-fail_end_trans:
- gfs2_trans_end(sdp);
-fail:
- gfs2_glock_dq(gh);
- return error;
-}
-
static int
__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
{
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 58f9607d6a86..2d43f69610a0 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
const struct gfs2_inode *ip);
int gfs2_permission(struct inode *inode, int mask);
-int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
-int gfs2_glock_nq_atime(struct gfs2_holder *gh);
int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 09d78c216f48..0c4cbe6c8285 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data,
error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
&ls->dlm_lockspace,
- DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
+ DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
+ (nodir ? DLM_LSFL_NODIR : 0),
GDLM_LVB_SIZE);
if (error) {
log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6c6af9f5e3ab..ad305854bdc6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/bio.h>
#include "gfs2.h"
#include "incore.h"
@@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
- unlock_buffer(bh);
gfs2_ail1_empty(sdp, 0);
tail = current_tail(sdp);
@@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
- set_buffer_dirty(bh);
- if (sync_dirty_buffer(bh))
+ bh->b_end_io = end_buffer_write_sync;
+ if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
+ goto skip_barrier;
+ get_bh(bh);
+ submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+ wait_on_buffer(bh);
+ if (buffer_eopnotsupp(bh)) {
+ clear_buffer_eopnotsupp(bh);
+ set_buffer_uptodate(bh);
+ set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ lock_buffer(bh);
+skip_barrier:
+ get_bh(bh);
+ submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+ wait_on_buffer(bh);
+ }
+ if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index b941f9f9f958..df48333e6f01 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -42,6 +42,7 @@ enum {
Opt_nosuiddir,
Opt_data_writeback,
Opt_data_ordered,
+ Opt_meta,
Opt_err,
};
@@ -66,6 +67,7 @@ static match_table_t tokens = {
{Opt_nosuiddir, "nosuiddir"},
{Opt_data_writeback, "data=writeback"},
{Opt_data_ordered, "data=ordered"},
+ {Opt_meta, "meta"},
{Opt_err, NULL}
};
@@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
case Opt_data_ordered:
args->ar_data = GFS2_DATA_ORDERED;
break;
+ case Opt_meta:
+ if (remount && args->ar_meta != 1)
+ goto cant_remount;
+ args->ar_meta = 1;
+ break;
case Opt_err:
default:
fs_info(sdp, "unknown option: %s\n", o);
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index e64a1b04117a..27563816e1c5 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page)
int error;
unlock_page(page);
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- error = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ error = gfs2_glock_nq(&gh);
if (unlikely(error))
goto out;
error = AOP_TRUNCATED_PAGE;
@@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct gfs2_holder gh;
int ret;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- ret = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
if (unlikely(ret))
goto out_uninit;
if (!gfs2_is_stuffed(ip))
@@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
unsigned to = from + len;
struct page *page;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
- error = gfs2_glock_nq_atime(&ip->i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
if (unlikely(error))
goto out_uninit;
@@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
if (gfs2_is_stuffed(ip))
return 0;
- if (offset > i_size_read(&ip->i_inode))
+ if (offset >= i_size_read(&ip->i_inode))
return 0;
return 1;
}
@@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
* unfortunately have the option of only flushing a range like
* the VFS does.
*/
- gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
- rv = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
+ rv = gfs2_glock_nq(&gh);
if (rv)
return rv;
rv = gfs2_ok_for_dio(ip, rw, offset);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index e9a366d4411c..3a747f8e2188 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
u64 offset = file->f_pos;
int error;
- gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
- error = gfs2_glock_nq_atime(&d_gh);
+ gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ error = gfs2_glock_nq(&d_gh);
if (error) {
gfs2_holder_uninit(&d_gh);
return error;
@@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
int error;
u32 fsflags;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
- error = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ error = gfs2_glock_nq(&gh);
if (error)
return error;
@@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
struct gfs2_alloc *al;
int ret;
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
- ret = gfs2_glock_nq_atime(&gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ ret = gfs2_glock_nq(&gh);
if (ret)
goto out;
@@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
struct gfs2_holder i_gh;
int error;
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
- error = gfs2_glock_nq_atime(&i_gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
+ error = gfs2_glock_nq(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
return error;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b4d1d6490633..b117fcf2c4f5 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -40,6 +40,44 @@
#define DO 0
#define UNDO 1
+static const u32 gfs2_old_fs_formats[] = {
+ 0
+};
+
+static const u32 gfs2_old_multihost_formats[] = {
+ 0
+};
+
+/**
+ * gfs2_tune_init - Fill a gfs2_tune structure with default values
+ * @gt: tune
+ *
+ */
+
+static void gfs2_tune_init(struct gfs2_tune *gt)
+{
+ spin_lock_init(&gt->gt_spin);
+
+ gt->gt_demote_secs = 300;
+ gt->gt_incore_log_blocks = 1024;
+ gt->gt_log_flush_secs = 60;
+ gt->gt_recoverd_secs = 60;
+ gt->gt_logd_secs = 1;
+ gt->gt_quotad_secs = 5;
+ gt->gt_quota_simul_sync = 64;
+ gt->gt_quota_warn_period = 10;
+ gt->gt_quota_scale_num = 1;
+ gt->gt_quota_scale_den = 1;
+ gt->gt_quota_cache_secs = 300;
+ gt->gt_quota_quantum = 60;
+ gt->gt_new_files_jdata = 0;
+ gt->gt_max_readahead = 1 << 18;
+ gt->gt_stall_secs = 600;
+ gt->gt_complain_secs = 10;
+ gt->gt_statfs_quantum = 30;
+ gt->gt_statfs_slow = 0;
+}
+
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
@@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
return sdp;
}
-static void init_vfs(struct super_block *sb, unsigned noatime)
+
+/**
+ * gfs2_check_sb - Check superblock
+ * @sdp: the filesystem
+ * @sb: The superblock
+ * @silent: Don't print a message if the check fails
+ *
+ * Checks the version code of the FS is one that we understand how to
+ * read and that the sizes of the various on-disk structures have not
+ * changed.
+ */
+
+static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
+ unsigned int x;
- sb->s_magic = GFS2_MAGIC;
- sb->s_op = &gfs2_super_ops;
- sb->s_export_op = &gfs2_export_ops;
- sb->s_time_gran = 1;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
+ if (sb->sb_magic != GFS2_MAGIC ||
+ sb->sb_type != GFS2_METATYPE_SB) {
+ if (!silent)
+ printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
+ return -EINVAL;
+ }
+
+ /* If format numbers match exactly, we're done. */
+
+ if (sb->sb_fs_format == GFS2_FORMAT_FS &&
+ sb->sb_multihost_format == GFS2_FORMAT_MULTI)
+ return 0;
+
+ if (sb->sb_fs_format != GFS2_FORMAT_FS) {
+ for (x = 0; gfs2_old_fs_formats[x]; x++)
+ if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
+ break;
+
+ if (!gfs2_old_fs_formats[x]) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_WARNING
+ "GFS2: I don't know how to upgrade this FS\n");
+ return -EINVAL;
+ }
+ }
+
+ if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ for (x = 0; gfs2_old_multihost_formats[x]; x++)
+ if (gfs2_old_multihost_formats[x] ==
+ sb->sb_multihost_format)
+ break;
+
+ if (!gfs2_old_multihost_formats[x]) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_WARNING
+ "GFS2: I don't know how to upgrade this FS\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!sdp->sd_args.ar_upgrade) {
+ printk(KERN_WARNING
+ "GFS2: code version (%u, %u) is incompatible "
+ "with ondisk format (%u, %u)\n",
+ GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
+ sb->sb_fs_format, sb->sb_multihost_format);
+ printk(KERN_INFO
+ "GFS2: Use the \"upgrade\" mount option to upgrade "
+ "the FS\n");
+ printk(KERN_INFO "GFS2: See the manual for more details\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void end_bio_io_page(struct bio *bio, int error)
+{
+ struct page *page = bio->bi_private;
- if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
- set_bit(noatime, &sdp->sd_flags);
+ if (!error)
+ SetPageUptodate(page);
+ else
+ printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
+ unlock_page(page);
+}
+
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+ const struct gfs2_sb *str = buf;
+
+ sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+ sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+ sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+ sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+ sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+ sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+ sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+ sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+ sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+ sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+ sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+ memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+ memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
+/**
+ * gfs2_read_super - Read the gfs2 super block from disk
+ * @sdp: The GFS2 super block
+ * @sector: The location of the super block
+ * @error: The error code to return
+ *
+ * This uses the bio functions to read the super block from disk
+ * because we want to be 100% sure that we never read cached data.
+ * A super block is read twice only during each GFS2 mount and is
+ * never written to by the filesystem. The first time its read no
+ * locks are held, and the only details which are looked at are those
+ * relating to the locking protocol. Once locking is up and working,
+ * the sb is read again under the lock to establish the location of
+ * the master directory (contains pointers to journals etc) and the
+ * root directory.
+ *
+ * Returns: 0 on success or error
+ */
+
+static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
+{
+ struct super_block *sb = sdp->sd_vfs;
+ struct gfs2_sb *p;
+ struct page *page;
+ struct bio *bio;
+
+ page = alloc_page(GFP_NOFS);
+ if (unlikely(!page))
+ return -ENOBUFS;
+
+ ClearPageUptodate(page);
+ ClearPageDirty(page);
+ lock_page(page);
+
+ bio = bio_alloc(GFP_NOFS, 1);
+ if (unlikely(!bio)) {
+ __free_page(page);
+ return -ENOBUFS;
+ }
- /* Don't let the VFS update atimes. GFS2 handles this itself. */
- sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
+ bio->bi_sector = sector * (sb->s_blocksize >> 9);
+ bio->bi_bdev = sb->s_bdev;
+ bio_add_page(bio, page, PAGE_SIZE, 0);
+
+ bio->bi_end_io = end_bio_io_page;
+ bio->bi_private = page;
+ submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+ wait_on_page_locked(page);
+ bio_put(bio);
+ if (!PageUptodate(page)) {
+ __free_page(page);
+ return -EIO;
+ }
+ p = kmap(page);
+ gfs2_sb_in(&sdp->sd_sb, p);
+ kunmap(page);
+ __free_page(page);
+ return 0;
+}
+/**
+ * gfs2_read_sb - Read super block
+ * @sdp: The GFS2 superblock
+ * @gl: the glock for the superblock (assumed to be held)
+ * @silent: Don't print message if mount fails
+ *
+ */
+
+static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
+{
+ u32 hash_blocks, ind_blocks, leaf_blocks;
+ u32 tmp_blocks;
+ unsigned int x;
+ int error;
+
+ error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+ if (error) {
+ if (!silent)
+ fs_err(sdp, "can't read superblock\n");
+ return error;
+ }
+
+ error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
+ if (error)
+ return error;
+
+ sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
+ GFS2_BASIC_BLOCK_SHIFT;
+ sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+ sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode)) / sizeof(u64);
+ sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_meta_header)) / sizeof(u64);
+ sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
+ sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
+ sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
+ sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
+ sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_meta_header)) /
+ sizeof(struct gfs2_quota_change);
+
+ /* Compute maximum reservation required to add a entry to a directory */
+
+ hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+ sdp->sd_jbsize);
+
+ ind_blocks = 0;
+ for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
+ tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
+ ind_blocks += tmp_blocks;
+ }
+
+ leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
+
+ sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
+
+ sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode);
+ sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
+ u64 space, d;
+ u32 m;
+
+ space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+
+ if (d != sdp->sd_heightsize[x - 1] || m)
+ break;
+ sdp->sd_heightsize[x] = space;
+ }
+ sdp->sd_max_height = x;
+ sdp->sd_heightsize[x] = ~0;
+ gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
+
+ sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
+ sizeof(struct gfs2_dinode);
+ sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
+ u64 space, d;
+ u32 m;
+
+ space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+
+ if (d != sdp->sd_jheightsize[x - 1] || m)
+ break;
+ sdp->sd_jheightsize[x] = space;
+ }
+ sdp->sd_max_jheight = x;
+ sdp->sd_jheightsize[x] = ~0;
+ gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
+
+ return 0;
}
static int init_names(struct gfs2_sbd *sdp, int silent)
@@ -224,51 +512,59 @@ fail:
return error;
}
-static inline struct inode *gfs2_lookup_root(struct super_block *sb,
- u64 no_addr)
+static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
+ u64 no_addr, const char *name)
{
- return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ struct dentry *dentry;
+ struct inode *inode;
+
+ inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ if (IS_ERR(inode)) {
+ fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
+ return PTR_ERR(inode);
+ }
+ dentry = d_alloc_root(inode);
+ if (!dentry) {
+ fs_err(sdp, "can't alloc %s dentry\n", name);
+ iput(inode);
+ return -ENOMEM;
+ }
+ dentry->d_op = &gfs2_dops;
+ *dptr = dentry;
+ return 0;
}
-static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
+static int init_sb(struct gfs2_sbd *sdp, int silent)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder sb_gh;
u64 no_addr;
- struct inode *inode;
- int error = 0;
+ int ret;
- if (undo) {
- if (sb->s_root) {
- dput(sb->s_root);
- sb->s_root = NULL;
- }
- return 0;
+ ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
+ LM_ST_SHARED, 0, &sb_gh);
+ if (ret) {
+ fs_err(sdp, "can't acquire superblock glock: %d\n", ret);
+ return ret;
}
- error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
- LM_ST_SHARED, 0, &sb_gh);
- if (error) {
- fs_err(sdp, "can't acquire superblock glock: %d\n", error);
- return error;
- }
-
- error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
- if (error) {
- fs_err(sdp, "can't read superblock: %d\n", error);
+ ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
+ if (ret) {
+ fs_err(sdp, "can't read superblock: %d\n", ret);
goto out;
}
/* Set up the buffer cache and SB for real */
if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
- error = -EINVAL;
+ ret = -EINVAL;
fs_err(sdp, "FS block size (%u) is too small for device "
"block size (%u)\n",
sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
goto out;
}
if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
- error = -EINVAL;
+ ret = -EINVAL;
fs_err(sdp, "FS block size (%u) is too big for machine "
"page size (%u)\n",
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
@@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
/* Get the root inode */
no_addr = sdp->sd_sb.sb_root_dir.no_addr;
- if (sb->s_type == &gfs2meta_fs_type)
- no_addr = sdp->sd_sb.sb_master_dir.no_addr;
- inode = gfs2_lookup_root(sb, no_addr);
- if (IS_ERR(inode)) {
- error = PTR_ERR(inode);
- fs_err(sdp, "can't read in root inode: %d\n", error);
+ ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root");
+ if (ret)
goto out;
- }
- sb->s_root = d_alloc_root(inode);
- if (!sb->s_root) {
- fs_err(sdp, "can't get root dentry\n");
- error = -ENOMEM;
- iput(inode);
- } else
- sb->s_root->d_op = &gfs2_dops;
-
+ /* Get the master inode */
+ no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+ ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master");
+ if (ret) {
+ dput(sdp->sd_root_dir);
+ goto out;
+ }
+ sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir);
out:
gfs2_glock_dq_uninit(&sb_gh);
- return error;
+ return ret;
}
/**
@@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
+ struct inode *master = sdp->sd_master_dir->d_inode;
struct gfs2_holder ji_gh;
struct task_struct *p;
struct gfs2_inode *ip;
@@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
goto fail_recoverd;
}
- sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
+ sdp->sd_jindex = gfs2_lookup_simple(master, "jindex");
if (IS_ERR(sdp->sd_jindex)) {
fs_err(sdp, "can't lookup journal index: %d\n", error);
return PTR_ERR(sdp->sd_jindex);
@@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
{
int error = 0;
struct gfs2_inode *ip;
- struct inode *inode;
+ struct inode *master = sdp->sd_master_dir->d_inode;
if (undo)
goto fail_qinode;
- inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
- if (IS_ERR(inode)) {
- error = PTR_ERR(inode);
- fs_err(sdp, "can't read in master directory: %d\n", error);
- goto fail;
- }
- sdp->sd_master_dir = inode;
-
error = init_journal(sdp, undo);
if (error)
- goto fail_master;
+ goto fail;
/* Read in the master inode number inode */
- sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
+ sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
if (IS_ERR(sdp->sd_inum_inode)) {
error = PTR_ERR(sdp->sd_inum_inode);
fs_err(sdp, "can't read in inum inode: %d\n", error);
@@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
/* Read in the master statfs inode */
- sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
+ sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
@@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
}
/* Read in the resource index inode */
- sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
+ sdp->sd_rindex = gfs2_lookup_simple(master, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
@@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
sdp->sd_rindex_uptodate = 0;
/* Read in the quota inode */
- sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
+ sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota");
if (IS_ERR(sdp->sd_quota_inode)) {
error = PTR_ERR(sdp->sd_quota_inode);
fs_err(sdp, "can't get quota file inode: %d\n", error);
@@ -571,8 +855,6 @@ fail_inum:
iput(sdp->sd_inum_inode);
fail_journal:
init_journal(sdp, UNDO);
-fail_master:
- iput(sdp->sd_master_dir);
fail:
return error;
}
@@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
char buf[30];
int error = 0;
struct gfs2_inode *ip;
+ struct inode *master = sdp->sd_master_dir->d_inode;
if (sdp->sd_args.ar_spectator)
return 0;
@@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
if (undo)
goto fail_qc_gh;
- pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
+ pn = gfs2_lookup_simple(master, "per_node");
if (IS_ERR(pn)) {
error = PTR_ERR(pn);
fs_err(sdp, "can't find per_node directory: %d\n", error);
@@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent)
goto fail;
}
- init_vfs(sb, SDF_NOATIME);
+ sb->s_magic = GFS2_MAGIC;
+ sb->s_op = &gfs2_super_ops;
+ sb->s_export_op = &gfs2_export_ops;
+ sb->s_time_gran = 1;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
@@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
if (error)
goto fail_lm;
- error = init_sb(sdp, silent, DO);
+ error = init_sb(sdp, silent);
if (error)
goto fail_locking;
@@ -869,7 +1156,11 @@ fail_per_node:
fail_inodes:
init_inodes(sdp, UNDO);
fail_sb:
- init_sb(sdp, 0, UNDO);
+ if (sdp->sd_root_dir)
+ dput(sdp->sd_root_dir);
+ if (sdp->sd_master_dir)
+ dput(sdp->sd_master_dir);
+ sb->s_root = NULL;
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
fail_lm:
@@ -887,151 +1178,63 @@ fail:
}
static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *data, struct vfsmount *mnt)
+ const char *dev_name, void *data, struct vfsmount *mnt)
{
- struct super_block *sb;
- struct gfs2_sbd *sdp;
- int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
- if (error)
- goto out;
- sb = mnt->mnt_sb;
- sdp = sb->s_fs_info;
- sdp->sd_gfs2mnt = mnt;
-out:
- return error;
+ return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
}
-static int fill_super_meta(struct super_block *sb, struct super_block *new,
- void *data, int silent)
+static struct super_block *get_gfs2_sb(const char *dev_name)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- struct inode *inode;
- int error = 0;
-
- new->s_fs_info = sdp;
- sdp->sd_vfs_meta = sb;
-
- init_vfs(new, SDF_NOATIME);
-
- /* Get the master inode */
- inode = igrab(sdp->sd_master_dir);
-
- new->s_root = d_alloc_root(inode);
- if (!new->s_root) {
- fs_err(sdp, "can't get root dentry\n");
- error = -ENOMEM;
- iput(inode);
- } else
- new->s_root->d_op = &gfs2_dops;
-
- return error;
-}
-
-static int set_bdev_super(struct super_block *s, void *data)
-{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- return 0;
-}
-
-static int test_bdev_super(struct super_block *s, void *data)
-{
- return s->s_bdev == data;
-}
-
-static struct super_block* get_gfs2_sb(const char *dev_name)
-{
- struct kstat stat;
+ struct super_block *sb;
struct nameidata nd;
- struct super_block *sb = NULL, *s;
int error;
error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
if (error) {
- printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
- dev_name);
- goto out;
- }
- error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat);
-
- list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) {
- if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
- (S_ISDIR(stat.mode) &&
- s == nd.path.dentry->d_inode->i_sb)) {
- sb = s;
- goto free_nd;
- }
+ printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
+ dev_name, error);
+ return NULL;
}
-
- printk(KERN_WARNING "GFS2: Unrecognized block device or "
- "mount point %s\n", dev_name);
-
-free_nd:
+ sb = nd.path.dentry->d_inode->i_sb;
+ if (sb && (sb->s_type == &gfs2_fs_type))
+ atomic_inc(&sb->s_active);
+ else
+ sb = NULL;
path_put(&nd.path);
-out:
return sb;
}
static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
- int error = 0;
- struct super_block *sb = NULL, *new;
+ struct super_block *sb = NULL;
struct gfs2_sbd *sdp;
sb = get_gfs2_sb(dev_name);
if (!sb) {
printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
- error = -ENOENT;
- goto error;
+ return -ENOENT;
}
sdp = sb->s_fs_info;
- if (sdp->sd_vfs_meta) {
- printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
- error = -EBUSY;
- goto error;
- }
- down(&sb->s_bdev->bd_mount_sem);
- new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
- up(&sb->s_bdev->bd_mount_sem);
- if (IS_ERR(new)) {
- error = PTR_ERR(new);
- goto error;
- }
- new->s_flags = flags;
- strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
- sb_set_blocksize(new, sb->s_blocksize);
- error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
- if (error) {
- up_write(&new->s_umount);
- deactivate_super(new);
- goto error;
- }
-
- new->s_flags |= MS_ACTIVE;
-
- /* Grab a reference to the gfs2 mount point */
- atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
- return simple_set_mnt(mnt, new);
-error:
- return error;
+ mnt->mnt_sb = sb;
+ mnt->mnt_root = dget(sdp->sd_master_dir);
+ return 0;
}
static void gfs2_kill_sb(struct super_block *sb)
{
- if (sb->s_fs_info) {
- gfs2_delete_debugfs_file(sb->s_fs_info);
- gfs2_meta_syncfs(sb->s_fs_info);
+ struct gfs2_sbd *sdp = sb->s_fs_info;
+ if (sdp) {
+ gfs2_meta_syncfs(sdp);
+ dput(sdp->sd_root_dir);
+ dput(sdp->sd_master_dir);
+ sdp->sd_root_dir = NULL;
+ sdp->sd_master_dir = NULL;
}
+ shrink_dcache_sb(sb);
kill_block_super(sb);
-}
-
-static void gfs2_kill_sb_meta(struct super_block *sb)
-{
- struct gfs2_sbd *sdp = sb->s_fs_info;
- generic_shutdown_super(sb);
- sdp->sd_vfs_meta = NULL;
- atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
+ if (sdp)
+ gfs2_delete_debugfs_file(sdp);
}
struct file_system_type gfs2_fs_type = {
@@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
.get_sb = gfs2_get_sb_meta,
- .kill_sb = gfs2_kill_sb_meta,
.owner = THIS_MODULE,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index e2c62f73a778..534e1e2c65ca 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
@@ -245,8 +249,10 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(dip);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
-out:
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_glock_dq(ghs);
+out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
if (!error) {
@@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
- goto out_rgrp;
+ goto out_gunlock;
error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
if (error)
@@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
+out_gunlock:
gfs2_glock_dq(ghs + 2);
out_rgrp:
gfs2_holder_uninit(ghs + 2);
@@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_holder ri_gh;
int error;
-
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
return error;
@@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
- error = gfs2_glock_nq_m(3, ghs);
+ error = gfs2_glock_nq(ghs); /* parent */
if (error)
- goto out;
+ goto out_parent;
+
+ error = gfs2_glock_nq(ghs + 1); /* child */
+ if (error)
+ goto out_child;
+
+ error = gfs2_glock_nq(ghs + 2); /* rgrp */
+ if (error)
+ goto out_rgrp;
error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
if (error)
@@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(3, ghs);
-out:
- gfs2_holder_uninit(ghs);
- gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs + 2);
+out_rgrp:
gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq(ghs + 1);
+out_child:
+ gfs2_holder_uninit(ghs + 1);
+ gfs2_glock_dq(ghs);
+out_parent:
+ gfs2_holder_uninit(ghs);
gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
return 0;
}
+/*
+ * gfs2_ok_to_move - check if it's ok to move a directory to another directory
+ * @this: move this
+ * @to: to here
+ *
+ * Follow @to back to the root and make sure we don't encounter @this
+ * Assumes we already hold the rename lock.
+ *
+ * Returns: errno
+ */
+
+static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
+{
+ struct inode *dir = &to->i_inode;
+ struct super_block *sb = dir->i_sb;
+ struct inode *tmp;
+ struct qstr dotdot;
+ int error = 0;
+
+ gfs2_str2qstr(&dotdot, "..");
+
+ igrab(dir);
+
+ for (;;) {
+ if (dir == &this->i_inode) {
+ error = -EINVAL;
+ break;
+ }
+ if (dir == sb->s_root->d_inode) {
+ error = 0;
+ break;
+ }
+
+ tmp = gfs2_lookupi(dir, &dotdot, 1);
+ if (IS_ERR(tmp)) {
+ error = PTR_ERR(tmp);
+ break;
+ }
+
+ iput(dir);
+ dir = tmp;
+ }
+
+ iput(dir);
+
+ return error;
+}
+
/**
* gfs2_rename - Rename a file
* @odir: Parent directory of old file name
@@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh;
+ struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
@@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
return 0;
}
- /* Make sure we aren't trying to move a dirctory into it's subdir */
-
- if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) {
- dir_rename = 1;
- error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0,
- &r_gh);
+ if (odip != ndip) {
+ error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
+ 0, &r_gh);
if (error)
goto out;
- error = gfs2_ok_to_move(ip, ndip);
- if (error)
- goto out_gunlock_r;
+ if (S_ISDIR(ip->i_inode.i_mode)) {
+ dir_rename = 1;
+ /* don't move a dirctory into it's subdir */
+ error = gfs2_ok_to_move(ip, ndip);
+ if (error)
+ goto out_gunlock_r;
+ }
}
num_gh = 1;
@@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
- error = gfs2_glock_nq_m(num_gh, ghs);
- if (error)
- goto out_uninit;
+ for (x = 0; x < num_gh; x++) {
+ error = gfs2_glock_nq(ghs + x);
+ if (error)
+ goto out_gunlock;
+ }
/* Check out the old directory */
@@ -804,12 +873,12 @@ out_alloc:
if (alloc_required)
gfs2_alloc_put(ndip);
out_gunlock:
- gfs2_glock_dq_m(num_gh, ghs);
-out_uninit:
- for (x = 0; x < num_gh; x++)
+ while (x--) {
+ gfs2_glock_dq(ghs + x);
gfs2_holder_uninit(ghs + x);
+ }
out_gunlock_r:
- if (dir_rename)
+ if (r_gh.gh_gl)
gfs2_glock_dq_uninit(&r_gh);
out:
return error;
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index f66ea0f7a356..d5355d9b5926 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -20,6 +20,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
+#include <linux/time.h>
#include "gfs2.h"
#include "incore.h"
@@ -38,6 +39,7 @@
#include "dir.h"
#include "eattr.h"
#include "bmap.h"
+#include "meta_io.h"
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
@@ -50,16 +52,74 @@
static int gfs2_write_inode(struct inode *inode, int sync)
{
struct gfs2_inode *ip = GFS2_I(inode);
-
- /* Check this is a "normal" inode */
- if (test_bit(GIF_USER, &ip->i_flags)) {
- if (current->flags & PF_MEMALLOC)
- return 0;
- if (sync)
- gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_holder gh;
+ struct buffer_head *bh;
+ struct timespec atime;
+ struct gfs2_dinode *di;
+ int ret = 0;
+
+ /* Check this is a "normal" inode, etc */
+ if (!test_bit(GIF_USER, &ip->i_flags) ||
+ (current->flags & PF_MEMALLOC))
+ return 0;
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+ if (ret)
+ goto do_flush;
+ ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+ if (ret)
+ goto do_unlock;
+ ret = gfs2_meta_inode_buffer(ip, &bh);
+ if (ret == 0) {
+ di = (struct gfs2_dinode *)bh->b_data;
+ atime.tv_sec = be64_to_cpu(di->di_atime);
+ atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
+ if (timespec_compare(&inode->i_atime, &atime) > 0) {
+ gfs2_trans_add_bh(ip->i_gl, bh, 1);
+ gfs2_dinode_out(ip, bh->b_data);
+ }
+ brelse(bh);
}
+ gfs2_trans_end(sdp);
+do_unlock:
+ gfs2_glock_dq_uninit(&gh);
+do_flush:
+ if (sync != 0)
+ gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ return ret;
+}
- return 0;
+/**
+ * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
+ * @sdp: the filesystem
+ *
+ * Returns: errno
+ */
+
+static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+{
+ struct gfs2_holder t_gh;
+ int error;
+
+ gfs2_quota_sync(sdp);
+ gfs2_statfs_sync(sdp);
+
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+ &t_gh);
+ if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ return error;
+
+ gfs2_meta_syncfs(sdp);
+ gfs2_log_shutdown(sdp);
+
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+ if (t_gh.gh_gl)
+ gfs2_glock_dq_uninit(&t_gh);
+
+ gfs2_quota_cleanup(sdp);
+
+ return error;
}
/**
@@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
- if (!sdp)
- return;
-
- if (!strncmp(sb->s_type->name, "gfs2meta", 8))
- return; /* Nothing to do */
-
/* Unfreeze the filesystem, if we need to */
mutex_lock(&sdp->sd_freeze_lock);
@@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb)
/* Release stuff */
- iput(sdp->sd_master_dir);
iput(sdp->sd_jindex);
iput(sdp->sd_inum_inode);
iput(sdp->sd_statfs_inode);
@@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb)
*
* Flushes the log to disk.
*/
+
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
sb->s_dirt = 0;
@@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
}
}
- if (*flags & (MS_NOATIME | MS_NODIRATIME))
- set_bit(SDF_NOATIME, &sdp->sd_flags);
- else
- clear_bit(SDF_NOATIME, &sdp->sd_flags);
-
- /* Don't let the VFS update atimes. GFS2 handles this itself. */
- *flags |= MS_NOATIME | MS_NODIRATIME;
-
return error;
}
@@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
* inode's blocks, or alternatively pass the baton on to another
* node for later deallocation.
*/
+
static void gfs2_drop_inode(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
@@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode)
}
}
+static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
+{
+ do {
+ if (d1 == d2)
+ return 1;
+ d1 = d1->d_parent;
+ } while (!IS_ROOT(d1));
+ return 0;
+}
+
/**
* gfs2_show_options - Show mount options for /proc/mounts
* @s: seq_file structure
@@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
+ if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
+ seq_printf(s, ",meta");
if (args->ar_lockproto[0])
seq_printf(s, ",lockproto=%s", args->ar_lockproto);
if (args->ar_locktable[0])
@@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
* conversion on the iopen lock, but we can change that later. This
* is safe, just less efficient.
*/
+
static void gfs2_delete_inode(struct inode *inode)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
@@ -478,8 +538,6 @@ out:
clear_inode(inode);
}
-
-
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
struct gfs2_inode *ip;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index ca831991cbc2..c3ba3d9d0aac 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -33,313 +33,6 @@
#include "trans.h"
#include "util.h"
-static const u32 gfs2_old_fs_formats[] = {
- 0
-};
-
-static const u32 gfs2_old_multihost_formats[] = {
- 0
-};
-
-/**
- * gfs2_tune_init - Fill a gfs2_tune structure with default values
- * @gt: tune
- *
- */
-
-void gfs2_tune_init(struct gfs2_tune *gt)
-{
- spin_lock_init(&gt->gt_spin);
-
- gt->gt_demote_secs = 300;
- gt->gt_incore_log_blocks = 1024;
- gt->gt_log_flush_secs = 60;
- gt->gt_recoverd_secs = 60;
- gt->gt_logd_secs = 1;
- gt->gt_quotad_secs = 5;
- gt->gt_quota_simul_sync = 64;
- gt->gt_quota_warn_period = 10;
- gt->gt_quota_scale_num = 1;
- gt->gt_quota_scale_den = 1;
- gt->gt_quota_cache_secs = 300;
- gt->gt_quota_quantum = 60;
- gt->gt_atime_quantum = 3600;
- gt->gt_new_files_jdata = 0;
- gt->gt_max_readahead = 1 << 18;
- gt->gt_stall_secs = 600;
- gt->gt_complain_secs = 10;
- gt->gt_statfs_quantum = 30;
- gt->gt_statfs_slow = 0;
-}
-
-/**
- * gfs2_check_sb - Check superblock
- * @sdp: the filesystem
- * @sb: The superblock
- * @silent: Don't print a message if the check fails
- *
- * Checks the version code of the FS is one that we understand how to
- * read and that the sizes of the various on-disk structures have not
- * changed.
- */
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
-{
- unsigned int x;
-
- if (sb->sb_magic != GFS2_MAGIC ||
- sb->sb_type != GFS2_METATYPE_SB) {
- if (!silent)
- printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
- return -EINVAL;
- }
-
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
-
- if (sb->sb_fs_format != GFS2_FORMAT_FS) {
- for (x = 0; gfs2_old_fs_formats[x]; x++)
- if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
- break;
-
- if (!gfs2_old_fs_formats[x]) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_WARNING
- "GFS2: I don't know how to upgrade this FS\n");
- return -EINVAL;
- }
- }
-
- if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
- for (x = 0; gfs2_old_multihost_formats[x]; x++)
- if (gfs2_old_multihost_formats[x] ==
- sb->sb_multihost_format)
- break;
-
- if (!gfs2_old_multihost_formats[x]) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_WARNING
- "GFS2: I don't know how to upgrade this FS\n");
- return -EINVAL;
- }
- }
-
- if (!sdp->sd_args.ar_upgrade) {
- printk(KERN_WARNING
- "GFS2: code version (%u, %u) is incompatible "
- "with ondisk format (%u, %u)\n",
- GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
- sb->sb_fs_format, sb->sb_multihost_format);
- printk(KERN_INFO
- "GFS2: Use the \"upgrade\" mount option to upgrade "
- "the FS\n");
- printk(KERN_INFO "GFS2: See the manual for more details\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-
-static void end_bio_io_page(struct bio *bio, int error)
-{
- struct page *page = bio->bi_private;
-
- if (!error)
- SetPageUptodate(page);
- else
- printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
- unlock_page(page);
-}
-
-static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
- const struct gfs2_sb *str = buf;
-
- sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
- sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
- sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
- sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
- sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
- sb->sb_bsize = be32_to_cpu(str->sb_bsize);
- sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
- sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
- sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
- sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
- sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
-
- memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
- memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-/**
- * gfs2_read_super - Read the gfs2 super block from disk
- * @sdp: The GFS2 super block
- * @sector: The location of the super block
- * @error: The error code to return
- *
- * This uses the bio functions to read the super block from disk
- * because we want to be 100% sure that we never read cached data.
- * A super block is read twice only during each GFS2 mount and is
- * never written to by the filesystem. The first time its read no
- * locks are held, and the only details which are looked at are those
- * relating to the locking protocol. Once locking is up and working,
- * the sb is read again under the lock to establish the location of
- * the master directory (contains pointers to journals etc) and the
- * root directory.
- *
- * Returns: 0 on success or error
- */
-
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
-{
- struct super_block *sb = sdp->sd_vfs;
- struct gfs2_sb *p;
- struct page *page;
- struct bio *bio;
-
- page = alloc_page(GFP_NOFS);
- if (unlikely(!page))
- return -ENOBUFS;
-
- ClearPageUptodate(page);
- ClearPageDirty(page);
- lock_page(page);
-
- bio = bio_alloc(GFP_NOFS, 1);
- if (unlikely(!bio)) {
- __free_page(page);
- return -ENOBUFS;
- }
-
- bio->bi_sector = sector * (sb->s_blocksize >> 9);
- bio->bi_bdev = sb->s_bdev;
- bio_add_page(bio, page, PAGE_SIZE, 0);
-
- bio->bi_end_io = end_bio_io_page;
- bio->bi_private = page;
- submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
- wait_on_page_locked(page);
- bio_put(bio);
- if (!PageUptodate(page)) {
- __free_page(page);
- return -EIO;
- }
- p = kmap(page);
- gfs2_sb_in(&sdp->sd_sb, p);
- kunmap(page);
- __free_page(page);
- return 0;
-}
-
-/**
- * gfs2_read_sb - Read super block
- * @sdp: The GFS2 superblock
- * @gl: the glock for the superblock (assumed to be held)
- * @silent: Don't print message if mount fails
- *
- */
-
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
-{
- u32 hash_blocks, ind_blocks, leaf_blocks;
- u32 tmp_blocks;
- unsigned int x;
- int error;
-
- error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
- if (error) {
- if (!silent)
- fs_err(sdp, "can't read superblock\n");
- return error;
- }
-
- error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
- if (error)
- return error;
-
- sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
- GFS2_BASIC_BLOCK_SHIFT;
- sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
- sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode)) / sizeof(u64);
- sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_meta_header)) / sizeof(u64);
- sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
- sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
- sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
- sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
- sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_meta_header)) /
- sizeof(struct gfs2_quota_change);
-
- /* Compute maximum reservation required to add a entry to a directory */
-
- hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
- sdp->sd_jbsize);
-
- ind_blocks = 0;
- for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
- tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
- ind_blocks += tmp_blocks;
- }
-
- leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
-
- sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
-
- sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_heightsize[x - 1] || m)
- break;
- sdp->sd_heightsize[x] = space;
- }
- sdp->sd_max_height = x;
- sdp->sd_heightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
-
- sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode);
- sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
- for (x = 2;; x++) {
- u64 space, d;
- u32 m;
-
- space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
- d = space;
- m = do_div(d, sdp->sd_inptrs);
-
- if (d != sdp->sd_jheightsize[x - 1] || m)
- break;
- sdp->sd_jheightsize[x] = space;
- }
- sdp->sd_max_jheight = x;
- sdp->sd_jheightsize[x] = ~0;
- gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
-
- return 0;
-}
-
/**
* gfs2_jindex_hold - Grab a lock on the jindex
* @sdp: The GFS2 superblock
@@ -581,39 +274,6 @@ fail:
return error;
}
-/**
- * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
- * @sdp: the filesystem
- *
- * Returns: errno
- */
-
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
-{
- struct gfs2_holder t_gh;
- int error;
-
- gfs2_quota_sync(sdp);
- gfs2_statfs_sync(sdp);
-
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
- &t_gh);
- if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
- return error;
-
- gfs2_meta_syncfs(sdp);
- gfs2_log_shutdown(sdp);
-
- clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-
- if (t_gh.gh_gl)
- gfs2_glock_dq_uninit(&t_gh);
-
- gfs2_quota_cleanup(sdp);
-
- return error;
-}
-
static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
{
const struct gfs2_statfs_change *str = buf;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 44361ecc44f7..50a4c9b1215e 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -12,11 +12,6 @@
#include "incore.h"
-void gfs2_tune_init(struct gfs2_tune *gt);
-
-int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
-int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
void gfs2_lm_unmount(struct gfs2_sbd *sdp);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
int gfs2_statfs_init(struct gfs2_sbd *sdp);
void gfs2_statfs_change(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 74846559fc3f..7e1879f1a02c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n");
ARGS_ATTR(suiddir, "%d\n");
ARGS_ATTR(data, "%d\n");
-/* one oddball doesn't fit the macro mold */
-static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%d\n",
- !!test_bit(SDF_NOATIME, &sdp->sd_flags));
-}
-static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
-
static struct attribute *args_attrs[] = {
&args_attr_lockproto.attr,
&args_attr_locktable.attr,
@@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = {
&args_attr_quota.attr,
&args_attr_suiddir.attr,
&args_attr_data.attr,
- &args_attr_noatime.attr,
NULL,
};
@@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0);
TUNE_ATTR(log_flush_secs, 0);
TUNE_ATTR(quota_warn_period, 0);
TUNE_ATTR(quota_quantum, 0);
-TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(statfs_slow, 0);
@@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = {
&tune_attr_log_flush_secs.attr,
&tune_attr_quota_warn_period.attr,
&tune_attr_quota_quantum.attr,
- &tune_attr_atime_quantum.attr,
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_statfs_slow.attr,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ecc3330972e5..7408227c49c9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
* a pointer to that same buffer (for convenience).
*/
-char *disk_name(struct gendisk *hd, int part, char *buf)
+char *disk_name(struct gendisk *hd, int partno, char *buf)
{
- if (!part)
+ if (!partno)
snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
- snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part);
+ snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
else
- snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part);
+ snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
return buf;
}
const char *bdevname(struct block_device *bdev, char *buf)
{
- int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor;
- return disk_name(bdev->bd_disk, part, buf);
+ return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
}
EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
if (isdigit(state->name[strlen(state->name)-1]))
sprintf(state->name, "p");
- state->limit = hd->minors;
+ state->limit = disk_max_parts(hd);
i = res = err = 0;
while (!res && check_part[i]) {
memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}
-static ssize_t part_size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
-static ssize_t part_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
+ int cpu;
- preempt_disable();
- part_round_stats(p);
- preempt_enable();
+ cpu = part_stat_lock();
+ part_round_stats(cpu, p);
+ part_stat_unlock();
return sprintf(buf,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev,
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
-static ssize_t part_fail_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ssize_t part_fail_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%d\n", p->make_it_fail);
}
-static ssize_t part_fail_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+ssize_t part_fail_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct hd_struct *p = dev_to_part(dev);
int i;
@@ -300,40 +300,34 @@ struct device_type part_type = {
.release = part_release,
};
-static inline void partition_sysfs_add_subdir(struct hd_struct *p)
-{
- struct kobject *k;
-
- k = kobject_get(&p->dev.kobj);
- p->holder_dir = kobject_create_and_add("holders", k);
- kobject_put(k);
-}
-
-static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
+static void delete_partition_rcu_cb(struct rcu_head *head)
{
- struct kobject *k;
+ struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
- k = kobject_get(&disk->dev.kobj);
- disk->holder_dir = kobject_create_and_add("holders", k);
- disk->slave_dir = kobject_create_and_add("slaves", k);
- kobject_put(k);
+ part->start_sect = 0;
+ part->nr_sects = 0;
+ part_stat_set_all(part, 0);
+ put_device(part_to_dev(part));
}
-void delete_partition(struct gendisk *disk, int part)
+void delete_partition(struct gendisk *disk, int partno)
{
- struct hd_struct *p = disk->part[part-1];
+ struct disk_part_tbl *ptbl = disk->part_tbl;
+ struct hd_struct *part;
- if (!p)
+ if (partno >= ptbl->len)
return;
- if (!p->nr_sects)
+
+ part = ptbl->part[partno];
+ if (!part)
return;
- disk->part[part-1] = NULL;
- p->start_sect = 0;
- p->nr_sects = 0;
- part_stat_set_all(p, 0);
- kobject_put(p->holder_dir);
- device_del(&p->dev);
- put_device(&p->dev);
+
+ blk_free_devt(part_devt(part));
+ rcu_assign_pointer(ptbl->part[partno], NULL);
+ kobject_put(part->holder_dir);
+ device_del(part_to_dev(part));
+
+ call_rcu(&part->rcu_head, delete_partition_rcu_cb);
}
static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev,
static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
whole_disk_show, NULL);
-int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
+int add_partition(struct gendisk *disk, int partno,
+ sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
+ dev_t devt = MKDEV(0, 0);
+ struct device *ddev = disk_to_dev(disk);
+ struct device *pdev;
+ struct disk_part_tbl *ptbl;
+ const char *dname;
int err;
+ err = disk_expand_part_tbl(disk, partno);
+ if (err)
+ return err;
+ ptbl = disk->part_tbl;
+
+ if (ptbl->part[partno])
+ return -EBUSY;
+
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
if (!init_part_stats(p)) {
err = -ENOMEM;
- goto out0;
+ goto out_free;
}
+ pdev = part_to_dev(p);
+
p->start_sect = start;
p->nr_sects = len;
- p->partno = part;
- p->policy = disk->policy;
+ p->partno = partno;
+ p->policy = get_disk_ro(disk);
- if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1]))
- snprintf(p->dev.bus_id, BUS_ID_SIZE,
- "%sp%d", disk->dev.bus_id, part);
+ dname = dev_name(ddev);
+ if (isdigit(dname[strlen(dname) - 1]))
+ snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
else
- snprintf(p->dev.bus_id, BUS_ID_SIZE,
- "%s%d", disk->dev.bus_id, part);
+ snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
- device_initialize(&p->dev);
- p->dev.devt = MKDEV(disk->major, disk->first_minor + part);
- p->dev.class = &block_class;
- p->dev.type = &part_type;
- p->dev.parent = &disk->dev;
- disk->part[part-1] = p;
+ device_initialize(pdev);
+ pdev->class = &block_class;
+ pdev->type = &part_type;
+ pdev->parent = ddev;
+
+ err = blk_alloc_devt(p, &devt);
+ if (err)
+ goto out_free;
+ pdev->devt = devt;
/* delay uevent until 'holders' subdir is created */
- p->dev.uevent_suppress = 1;
- err = device_add(&p->dev);
+ pdev->uevent_suppress = 1;
+ err = device_add(pdev);
if (err)
- goto out1;
- partition_sysfs_add_subdir(p);
- p->dev.uevent_suppress = 0;
+ goto out_put;
+
+ err = -ENOMEM;
+ p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
+ if (!p->holder_dir)
+ goto out_del;
+
+ pdev->uevent_suppress = 0;
if (flags & ADDPART_FLAG_WHOLEDISK) {
- err = device_create_file(&p->dev, &dev_attr_whole_disk);
+ err = device_create_file(pdev, &dev_attr_whole_disk);
if (err)
- goto out2;
+ goto out_del;
}
+ /* everything is up and running, commence */
+ INIT_RCU_HEAD(&p->rcu_head);
+ rcu_assign_pointer(ptbl->part[partno], p);
+
/* suppress uevent if the disk supresses it */
- if (!disk->dev.uevent_suppress)
- kobject_uevent(&p->dev.kobj, KOBJ_ADD);
+ if (!ddev->uevent_suppress)
+ kobject_uevent(&pdev->kobj, KOBJ_ADD);
return 0;
-out2:
- device_del(&p->dev);
-out1:
- put_device(&p->dev);
- free_part_stats(p);
-out0:
+out_free:
kfree(p);
return err;
+out_del:
+ kobject_put(p->holder_dir);
+ device_del(pdev);
+out_put:
+ put_device(pdev);
+ blk_free_devt(devt);
+ return err;
}
/* Not exported, helper to add_disk(). */
void register_disk(struct gendisk *disk)
{
+ struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
char *s;
- int i;
- struct hd_struct *p;
int err;
- disk->dev.parent = disk->driverfs_dev;
- disk->dev.devt = MKDEV(disk->major, disk->first_minor);
+ ddev->parent = disk->driverfs_dev;
- strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
+ strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
/* ewww... some of these buggers have / in the name... */
- s = strchr(disk->dev.bus_id, '/');
+ s = strchr(ddev->bus_id, '/');
if (s)
*s = '!';
/* delay uevents, until we scanned partition table */
- disk->dev.uevent_suppress = 1;
+ ddev->uevent_suppress = 1;
- if (device_add(&disk->dev))
+ if (device_add(ddev))
return;
#ifndef CONFIG_SYSFS_DEPRECATED
- err = sysfs_create_link(block_depr, &disk->dev.kobj,
- kobject_name(&disk->dev.kobj));
+ err = sysfs_create_link(block_depr, &ddev->kobj,
+ kobject_name(&ddev->kobj));
if (err) {
- device_del(&disk->dev);
+ device_del(ddev);
return;
}
#endif
- disk_sysfs_add_subdirs(disk);
+ disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
+ disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
/* No minors to use for partitions */
- if (disk->minors == 1)
+ if (!disk_partitionable(disk))
goto exit;
/* No such device (e.g., media were just removed) */
@@ -458,41 +482,57 @@ void register_disk(struct gendisk *disk)
exit:
/* announce disk after possible partitions are created */
- disk->dev.uevent_suppress = 0;
- kobject_uevent(&disk->dev.kobj, KOBJ_ADD);
+ ddev->uevent_suppress = 0;
+ kobject_uevent(&ddev->kobj, KOBJ_ADD);
/* announce possible partitions */
- for (i = 1; i < disk->minors; i++) {
- p = disk->part[i-1];
- if (!p || !p->nr_sects)
- continue;
- kobject_uevent(&p->dev.kobj, KOBJ_ADD);
- }
+ disk_part_iter_init(&piter, disk, 0);
+ while ((part = disk_part_iter_next(&piter)))
+ kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
+ disk_part_iter_exit(&piter);
}
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
{
+ struct disk_part_iter piter;
+ struct hd_struct *part;
struct parsed_partitions *state;
- int p, res;
+ int p, highest, res;
if (bdev->bd_part_count)
return -EBUSY;
res = invalidate_partition(disk, 0);
if (res)
return res;
- bdev->bd_invalidated = 0;
- for (p = 1; p < disk->minors; p++)
- delete_partition(disk, p);
+
+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
+ while ((part = disk_part_iter_next(&piter)))
+ delete_partition(disk, part->partno);
+ disk_part_iter_exit(&piter);
+
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
return 0;
if (IS_ERR(state)) /* I/O error reading the partition table */
return -EIO;
/* tell userspace that the media / partition table may have changed */
- kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE);
+ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+ /* Detect the highest partition number and preallocate
+ * disk->part_tbl. This is an optimization and not strictly
+ * necessary.
+ */
+ for (p = 1, highest = 0; p < state->limit; p++)
+ if (state->parts[p].size)
+ highest = p;
+
+ disk_expand_part_tbl(disk, highest);
+
+ /* add partitions */
for (p = 1; p < state->limit; p++) {
sector_t size = state->parts[p].size;
sector_t from = state->parts[p].from;
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector);
void del_gendisk(struct gendisk *disk)
{
- int p;
+ struct disk_part_iter piter;
+ struct hd_struct *part;
/* invalidate stuff */
- for (p = disk->minors - 1; p > 0; p--) {
- invalidate_partition(disk, p);
- delete_partition(disk, p);
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ while ((part = disk_part_iter_next(&piter))) {
+ invalidate_partition(disk, part->partno);
+ delete_partition(disk, part->partno);
}
+ disk_part_iter_exit(&piter);
+
invalidate_partition(disk, 0);
- disk->capacity = 0;
+ blk_free_devt(disk_to_dev(disk)->devt);
+ set_capacity(disk, 0);
disk->flags &= ~GENHD_FL_UP;
unlink_gendisk(disk);
- disk_stat_set_all(disk, 0);
- disk->stamp = 0;
+ part_stat_set_all(&disk->part0, 0);
+ disk->part0.stamp = 0;
- kobject_put(disk->holder_dir);
+ kobject_put(disk->part0.holder_dir);
kobject_put(disk->slave_dir);
disk->driverfs_dev = NULL;
#ifndef CONFIG_SYSFS_DEPRECATED
- sysfs_remove_link(block_depr, disk->dev.bus_id);
+ sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
#endif
- device_del(&disk->dev);
+ device_del(disk_to_dev(disk));
}
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ecd9e8b..98dbe1a84528 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -5,15 +5,13 @@
* add_gd_partition adds a partitions details to the devices partition
* description.
*/
-enum { MAX_PART = 256 };
-
struct parsed_partitions {
char name[BDEVNAME_SIZE];
struct {
sector_t from;
sector_t size;
int flags;
- } parts[MAX_PART];
+ } parts[DISK_MAX_PARTS];
int next;
int limit;
};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 986061ae1b9b..36d5fcd3f593 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1001,12 +1001,13 @@ xfs_buf_iodone_work(
* We can get an EOPNOTSUPP to ordered writes. Here we clear the
* ordered flag and reissue them. Because we can't tell the higher
* layers directly that they should not issue ordered I/O anymore, they
- * need to check if the ordered flag was cleared during I/O completion.
+ * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
*/
if ((bp->b_error == EOPNOTSUPP) &&
(bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
XB_TRACE(bp, "ordered_retry", bp->b_iodone);
bp->b_flags &= ~XBF_ORDERED;
+ bp->b_flags |= _XFS_BARRIER_FAILED;
xfs_buf_iorequest(bp);
} else if (bp->b_iodone)
(*(bp->b_iodone))(bp);
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index fe0109956656..456519a088c7 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -85,6 +85,14 @@ typedef enum {
* modifications being lost.
*/
_XBF_PAGE_LOCKED = (1 << 22),
+
+ /*
+ * If we try a barrier write, but it fails we have to communicate
+ * this to the upper layers. Unfortunately b_error gets overwritten
+ * when the buffer is re-issued so we have to add another flag to
+ * keep this information.
+ */
+ _XFS_BARRIER_FAILED = (1 << 23),
} xfs_buf_flags_t;
typedef enum {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 503ea89e8b9a..0b02c6443551 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1033,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp)
l = iclog->ic_log;
/*
- * If the ordered flag has been removed by a lower
- * layer, it means the underlyin device no longer supports
+ * If the _XFS_BARRIER_FAILED flag was set by a lower
+ * layer, it means the underlying device no longer supports
* barrier I/O. Warn loudly and turn off barriers.
*/
- if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
+ if (bp->b_flags & _XFS_BARRIER_FAILED) {
+ bp->b_flags &= ~_XFS_BARRIER_FAILED;
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
xfs_fs_cmn_err(CE_WARN, l->l_mp,
"xlog_iodone: Barriers are no longer supported"