summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2013-01-29 18:40:14 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-01 14:24:23 -0500
commit53b381b3abeb86f12787a6c40fee9b2f71edc23b (patch)
treec1018ba2157778f0200d2ede0c0df48fe5df8f14 /fs/btrfs/inode.c
parent64a167011bcabc1e855658387c8a4464b71f3138 (diff)
downloadlwn-53b381b3abeb86f12787a6c40fee9b2f71edc23b.tar.gz
lwn-53b381b3abeb86f12787a6c40fee9b2f71edc23b.zip
Btrfs: RAID5 and RAID6
This builds on David Woodhouse's original Btrfs raid5/6 implementation. The code has changed quite a bit, blame Chris Mason for any bugs. Read/modify/write is done after the higher levels of the filesystem have prepared a given bio. This means the higher layers are not responsible for building full stripes, and they don't need to query for the topology of the extents that may get allocated during delayed allocation runs. It also means different files can easily share the same stripe. But, it does expose us to incorrect parity if we crash or lose power while doing a read/modify/write cycle. This will be addressed in a later commit. Scrub is unable to repair crc errors on raid5/6 chunks. Discard does not work on raid5/6 (yet) The stripe size is fixed at 64KiB per disk. This will be tunable in a later commit. Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c18
1 files changed, 13 insertions, 5 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1b98c4ce3c6f..6f4e41dca970 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,6 +39,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
+#include <linux/blkdev.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -6386,19 +6387,24 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int async_submit = 0;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(root->fs_info, READ, start_sector << 9,
+ ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(orig_bio);
return -EIO;
}
-
if (map_length >= orig_bio->bi_size) {
bio = orig_bio;
goto submit;
}
- async_submit = 1;
+ /* async crcs make it difficult to collect full stripe writes. */
+ if (btrfs_get_alloc_profile(root, 1) &
+ (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
+ async_submit = 0;
+ else
+ async_submit = 1;
+
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
return -ENOMEM;
@@ -6440,7 +6446,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_end_io = btrfs_end_dio_bio;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(root->fs_info, READ,
+ ret = btrfs_map_block(root->fs_info, rw,
start_sector << 9,
&map_length, NULL, 0);
if (ret) {
@@ -6583,15 +6589,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
offset, nr_segs))
return 0;
- return __blockdev_direct_IO(rw, iocb, inode,
+ ret = __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0);
+ return ret;
}
#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)