diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-03-25 16:50:33 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:01 -0400 |
commit | 593060d756e0c2382d59cf00d5f3b9e3a336c408 (patch) | |
tree | c102f8abd9210eeba8ae4e7f6626b80cfed1b8d8 /fs/btrfs/volumes.c | |
parent | 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 (diff) | |
download | lwn-593060d756e0c2382d59cf00d5f3b9e3a336c408.tar.gz lwn-593060d756e0c2382d59cf00d5f3b9e3a336c408.zip |
Btrfs: Implement raid0 when multiple devices are present
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 130 |
1 files changed, 100 insertions, 30 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 263f01cc3db4..d8fce32a3bbc 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/bio.h> #include <linux/buffer_head.h> +#include <asm/div64.h> #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -25,10 +26,24 @@ #include "print-tree.h" #include "volumes.h" -struct map_lookup { +struct stripe { struct btrfs_device *dev; u64 physical; }; + +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + struct stripe stripes[]; +}; + +#define map_lookup_size(n) (sizeof(struct map_lookup) + \ + (sizeof(struct stripe) * (n))) + static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 *num_bytes, u64 type) { u64 dev_offset; + struct btrfs_fs_info *info = extent_root->fs_info; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; @@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int looped = 0; int ret; int index; + int stripe_len = 64 * 1024; struct btrfs_key key; if (list_empty(dev_list)) return -ENOSPC; + + if (type & BTRFS_BLOCK_GROUP_RAID0) + num_stripes = btrfs_super_num_devices(&info->super_copy); + if (type & BTRFS_BLOCK_GROUP_DATA) + stripe_len = 64 * 1024; + if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) + stripe_len = 32 * 1024; again: INIT_LIST_HEAD(&private_devs); cur = dev_list->next; @@ -650,9 +674,15 @@ again: if (!chunk) return -ENOMEM; + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); + if (!map) { + kfree(chunk); + return -ENOMEM; + } + stripes = &chunk->stripe; - *num_bytes = calc_size; + *num_bytes = calc_size * num_stripes; index = 0; while(index < num_stripes) { BUG_ON(list_empty(&private_devs)); @@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ret = btrfs_update_device(trans, device); BUG_ON(ret); + map->stripes[index].dev = device; + map->stripes[index].physical = dev_offset; btrfs_set_stack_stripe_devid(stripes + index, device->devid); btrfs_set_stack_stripe_offset(stripes + index, dev_offset); physical = dev_offset; @@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); key.offset = *num_bytes; key.type = BTRFS_CHUNK_ITEM_KEY; btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); - btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); + btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); btrfs_set_stack_chunk_type(chunk, type); btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); - btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); - btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); + btrfs_set_stack_chunk_io_align(chunk, stripe_len); + btrfs_set_stack_chunk_io_width(chunk, stripe_len); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); + map->sector_size = extent_root->sectorsize; + map->stripe_len = stripe_len; + map->io_align = stripe_len; + map->io_width = stripe_len; + map->type = type; + map->num_stripes = num_stripes; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, btrfs_chunk_item_size(num_stripes)); @@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); - if (!map) { - free_extent_map(em); - return -ENOMEM; - } - em->bdev = (struct block_device *)map; em->start = key.objectid; em->len = key.offset; em->block_start = 0; - map->physical = physical; - map->dev = device; - - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; - } kfree(chunk); em_tree = &extent_root->fs_info->mapping_tree.map_tree; @@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, struct map_lookup *map; struct extent_map_tree *em_tree = &map_tree->map_tree; u64 offset; + u64 stripe_offset; + u64 stripe_nr; + int stripe_index; spin_lock(&em_tree->lock); @@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; offset = logical - em->start; - *phys = map->physical + offset; - *length = em->len - offset; - *dev = map->dev; + + stripe_nr = offset; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + do_div(stripe_nr, map->stripe_len); + + stripe_offset = stripe_nr * map->stripe_len; + BUG_ON(offset < stripe_offset); + + /* stripe_offset is the offset of this block in its stripe*/ + stripe_offset = offset - stripe_offset; + + /* + * after this do_div call, stripe_nr is the number of stripes + * on this device we have to walk to find the data, and + * stripe_index is the number of our device in the stripe array + */ + stripe_index = do_div(stripe_nr, map->num_stripes); + + BUG_ON(stripe_index >= map->num_stripes); + + *phys = map->stripes[stripe_index].physical + stripe_offset + + stripe_nr * map->stripe_len; + + if (map->type & BTRFS_BLOCK_GROUP_RAID0) { + /* we limit the length of each bio to what fits in a stripe */ + *length = min_t(u64, em->len - offset, + map->stripe_len - stripe_offset); + } else { + *length = em->len - offset; + } + *dev = map->stripes[stripe_index].dev; free_extent_map(em); spin_unlock(&em_tree->lock); return 0; @@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, u64 logical; u64 length; u64 devid; + int num_stripes; int ret; + int i; logical = key->objectid; length = key->offset; @@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em = alloc_extent_map(GFP_NOFS); if (!em) return -ENOMEM; - map = kmalloc(sizeof(*map), GFP_NOFS); + num_stripes = btrfs_chunk_num_stripes(leaf, chunk); + map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { free_extent_map(em); return -ENOMEM; @@ -857,13 +918,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, em->len = length; em->block_start = 0; - map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); - devid = btrfs_stripe_devid_nr(leaf, chunk, 0); - map->dev = btrfs_find_device(root, devid); - if (!map->dev) { - kfree(map); - free_extent_map(em); - return -EIO; + map->num_stripes = num_stripes; + map->io_width = btrfs_chunk_io_width(leaf, chunk); + map->io_align = btrfs_chunk_io_align(leaf, chunk); + map->sector_size = btrfs_chunk_sector_size(leaf, chunk); + map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); + map->type = btrfs_chunk_type(leaf, chunk); + for (i = 0; i < num_stripes; i++) { + map->stripes[i].physical = + btrfs_stripe_offset_nr(leaf, chunk, i); + devid = btrfs_stripe_devid_nr(leaf, chunk, i); + map->stripes[i].dev = btrfs_find_device(root, devid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } } spin_lock(&map_tree->map_tree.lock); |