From 2e635a278354a1a7951e16cfea4c247d6d0e7c99 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Mar 2007 11:12:56 -0400 Subject: Btrfs: initial move to kernel module land Signed-off-by: Chris Mason --- fs/btrfs/super.c | 205 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 fs/btrfs/super.c (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c new file mode 100644 index 000000000000..4ae76044aeab --- /dev/null +++ b/fs/btrfs/super.c @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" + +#define BTRFS_SUPER_MAGIC 0x9123682E +#if 0 +/* some random number */ + +static struct super_operations ramfs_ops; +static struct inode_operations ramfs_dir_inode_operations; + +static struct backing_dev_info ramfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | + BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, +}; + +struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_mapping->a_ops = &ramfs_aops; + inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_op = &ramfs_file_inode_operations; + inode->i_fop = &ramfs_file_operations; + break; + case S_IFDIR: + inode->i_op = &ramfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + break; + case S_IFLNK: + inode->i_op = &page_symlink_inode_operations; + break; + } + } + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int +ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +{ + struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); + int error = -ENOSPC; + + if (inode) { + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + error = 0; + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } + return error; +} + +static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +{ + int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); + if (!retval) + inc_nlink(dir); + return retval; +} + +static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +{ + return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); +} + +static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +{ + struct inode *inode; + int error = -ENOSPC; + + inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); + if (inode) { + int l = strlen(symname)+1; + error = page_symlink(inode, symname, l); + if (!error) { + if (dir->i_mode & S_ISGID) + inode->i_gid = dir->i_gid; + d_instantiate(dentry, inode); + dget(dentry); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + } else + iput(inode); + } + return error; +} + +static struct inode_operations ramfs_dir_inode_operations = { + .create = ramfs_create, + .lookup = simple_lookup, + .link = simple_link, + .unlink = simple_unlink, + .symlink = ramfs_symlink, + .mkdir = ramfs_mkdir, + .rmdir = simple_rmdir, + .mknod = ramfs_mknod, + .rename = simple_rename, +}; +#endif + +struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode * inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + } + return inode; +} + +static struct super_operations btrfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, +}; + +static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = BTRFS_SUPER_MAGIC; + sb->s_op = &btrfs_ops; + sb->s_time_gran = 1; + inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + return 0; +} + +static int btrfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, + btrfs_fill_super, mnt); +} + +static struct file_system_type btrfs_fs_type = { + .owner = THIS_MODULE, + .name = "btrfs", + .get_sb = btrfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_btrfs_fs(void) +{ + printk("btrfs loaded!\n"); + return register_filesystem(&btrfs_fs_type); +} + +static void __exit exit_btrfs_fs(void) +{ + unregister_filesystem(&btrfs_fs_type); + printk("btrfs unloaded\n"); +} + +module_init(init_btrfs_fs) +module_exit(exit_btrfs_fs) + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From e20d96d64f9cf9288ffecc9ad4714e91c3b97ca8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 12:13:20 -0400 Subject: Mountable btrfs, with readdir Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 5 +- fs/btrfs/ctree.c | 273 ++++++++++++++++++++++++----------------------- fs/btrfs/ctree.h | 62 ++++++++--- fs/btrfs/dir-item.c | 17 ++- fs/btrfs/disk-io.c | 281 +++++++++++++++---------------------------------- fs/btrfs/disk-io.h | 51 +++++---- fs/btrfs/extent-tree.c | 144 +++++++++++++------------ fs/btrfs/hash.c | 9 ++ fs/btrfs/inode-map.c | 8 +- fs/btrfs/print-tree.c | 20 ++-- fs/btrfs/print-tree.h | 2 +- fs/btrfs/root-tree.c | 4 +- fs/btrfs/super.c | 261 +++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.h | 4 +- 14 files changed, 651 insertions(+), 490 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 99e45a54ebd6..1a0fb7422393 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -2,7 +2,8 @@ ifneq ($(KERNELRELEASE),) # kbuild part of makefile obj-m := btrfs.o -btrfs-y := super.o +btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ + hash.o file-item.o inode-item.o inode-map.o disk-io.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -14,7 +15,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) -C $(KERNELDIR) M=`pwd` modules + $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9fbd07c37fde..e690e2bb47d2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -7,11 +7,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst, struct btrfs_buffer + *root, struct buffer_head *dst, struct buffer_head *src); static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf); + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -32,32 +32,34 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *buf, struct btrfs_buffer - *parent, int parent_slot, struct btrfs_buffer + *root, struct buffer_head *buf, struct buffer_head + *parent, int parent_slot, struct buffer_head **cow_ret) { - struct btrfs_buffer *cow; + struct buffer_head *cow; + struct btrfs_node *cow_node; - if (!list_empty(&buf->dirty)) { + if (!buffer_dirty(buf)) { *cow_ret = buf; return 0; } cow = btrfs_alloc_free_block(trans, root); - memcpy(&cow->node, &buf->node, root->blocksize); - btrfs_set_header_blocknr(&cow->node.header, cow->blocknr); + cow_node = btrfs_buffer_node(cow); + memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); + btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; - cow->count++; + get_bh(cow); if (buf != root->commit_root) - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); btrfs_block_release(root, buf); } else { - btrfs_set_node_blockptr(&parent->node, parent_slot, - cow->blocknr); - BUG_ON(list_empty(&parent->dirty)); - btrfs_free_extent(trans, root, buf->blocknr, 1, 1); + btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, + cow->b_blocknr); + BUG_ON(!buffer_dirty(parent)); + btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); return 0; @@ -119,12 +121,12 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, { int i; struct btrfs_node *parent = NULL; - struct btrfs_node *node = &path->nodes[level]->node; + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); int parent_slot; u32 nritems = btrfs_header_nritems(&node->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(nritems == 0); if (parent) { @@ -148,13 +150,13 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { int i; - struct btrfs_leaf *leaf = &path->nodes[level]->leaf; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); struct btrfs_node *parent = NULL; int parent_slot; u32 nritems = btrfs_header_nritems(&leaf->header); if (path->nodes[level + 1]) - parent = &path->nodes[level + 1]->node; + parent = btrfs_buffer_node(path->nodes[level + 1]); parent_slot = path->slots[level + 1]; BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); @@ -250,11 +252,11 @@ static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) return -1; } -static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, - struct btrfs_buffer *parent_buf, +static struct buffer_head *read_node_slot(struct btrfs_root *root, + struct buffer_head *parent_buf, int slot) { - struct btrfs_node *node = &parent_buf->node; + struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; if (slot >= btrfs_header_nritems(&node->header)) @@ -265,10 +267,10 @@ static struct btrfs_buffer *read_node_slot(struct btrfs_root *root, static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *right_buf; - struct btrfs_buffer *mid_buf; - struct btrfs_buffer *left_buf; - struct btrfs_buffer *parent_buf = NULL; + struct buffer_head *right_buf; + struct buffer_head *mid_buf; + struct buffer_head *left_buf; + struct buffer_head *parent_buf = NULL; struct btrfs_node *right = NULL; struct btrfs_node *mid; struct btrfs_node *left = NULL; @@ -283,7 +285,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root return 0; mid_buf = path->nodes[level]; - mid = &mid_buf->node; + mid = btrfs_buffer_node(mid_buf); orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) @@ -295,8 +297,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * by promoting the node below to a root */ if (!parent_buf) { - struct btrfs_buffer *child; - u64 blocknr = mid_buf->blocknr; + struct buffer_head *child; + u64 blocknr = mid_buf->b_blocknr; if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -313,7 +315,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = &parent_buf->node; + parent = btrfs_buffer_node(parent_buf); if (btrfs_header_nritems(&mid->header) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -326,7 +328,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (left_buf) { btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, &left_buf); - left = &left_buf->node; + left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) @@ -339,12 +341,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (right_buf) { btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, &right_buf); - right = &right_buf->node; + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->blocknr; + u64 blocknr = right_buf->b_blocknr; btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); right_buf = NULL; @@ -360,7 +362,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -381,7 +383,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->blocknr; + u64 blocknr = mid_buf->b_blocknr; btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); mid_buf = NULL; @@ -396,13 +398,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&parent_buf->dirty)); + BUG_ON(!buffer_dirty(parent_buf)); } /* update the path */ if (left_buf) { if (btrfs_header_nritems(&left->header) > orig_slot) { - left_buf->count++; // released below + get_bh(left_buf); path->nodes[level] = left_buf; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; @@ -415,8 +417,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* double check we haven't messed things up */ check_block(root, path, level); - if (orig_ptr != btrfs_node_blockptr(&path->nodes[level]->node, - path->slots[level])) + if (orig_ptr != + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), + path->slots[level])) BUG(); if (right_buf) @@ -443,8 +446,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct btrfs_buffer *b; - struct btrfs_buffer *cow_buf; + struct buffer_head *b; + struct buffer_head *cow_buf; struct btrfs_node *c; int slot; int ret; @@ -452,18 +455,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root again: b = root->node; - b->count++; + get_bh(b); while (b) { - level = btrfs_header_level(&b->node.header); + c = btrfs_buffer_node(b); + level = btrfs_header_level(&c->header); if (cow) { int wret; - wret = btrfs_cow_block(trans, root, b, p->nodes[level + - 1], p->slots[level + 1], + wret = btrfs_cow_block(trans, root, b, + p->nodes[level + 1], + p->slots[level + 1], &cow_buf); b = cow_buf; } BUG_ON(!cow && ins_len); - c = &b->node; + c = btrfs_buffer_node(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -480,7 +485,7 @@ again: if (sret) return sret; b = p->nodes[level]; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -490,7 +495,7 @@ again: b = p->nodes[level]; if (!b) goto again; - c = &b->node; + c = btrfs_buffer_node(b); slot = p->slots[level]; BUG_ON(btrfs_header_nritems(&c->header) == 1); } @@ -505,11 +510,9 @@ again: if (sret) return sret; } - BUG_ON(root->node->count == 1); return ret; } } - BUG_ON(root->node->count == 1); return 1; } @@ -534,9 +537,9 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = &path->nodes[i]->node; + t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(list_empty(&path->nodes[i]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[i])); if (tslot != 0) break; } @@ -551,11 +554,11 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *dst_buf, struct - btrfs_buffer *src_buf) + *root, struct buffer_head *dst_buf, struct + buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; @@ -580,8 +583,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -595,11 +598,11 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * this will only push up to 1/2 the contents of the left node over */ static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_buffer *dst_buf, - struct btrfs_buffer *src_buf) + btrfs_root *root, struct buffer_head *dst_buf, + struct buffer_head *src_buf) { - struct btrfs_node *src = &src_buf->node; - struct btrfs_node *dst = &dst_buf->node; + struct btrfs_node *src = btrfs_buffer_node(src_buf); + struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; @@ -628,8 +631,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(list_empty(&src_buf->dirty)); - BUG_ON(list_empty(&dst_buf->dirty)); + BUG_ON(!buffer_dirty(src_buf)); + BUG_ON(!buffer_dirty(dst_buf)); return ret; } @@ -643,7 +646,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *lower; struct btrfs_node *c; struct btrfs_disk_key *lower_key; @@ -652,24 +655,24 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root); - c = &t->node; + c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->blocknr); + btrfs_set_header_blocknr(&c->header, t->b_blocknr); btrfs_set_header_parentid(&c->header, - btrfs_header_parentid(&root->node->node.header)); - lower = &path->nodes[level-1]->node; + btrfs_header_parentid(btrfs_buffer_header(root->node))); + lower = btrfs_buffer_node(path->nodes[level-1]); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->blocknr); + btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; - t->count++; + get_bh(t); path->nodes[level] = t; path->slots[level] = 0; return 0; @@ -692,7 +695,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root int nritems; BUG_ON(!path->nodes[level]); - lower = &path->nodes[level]->node; + lower = btrfs_buffer_node(path->nodes[level]); nritems = btrfs_header_nritems(&lower->header); if (slot > nritems) BUG(); @@ -705,7 +708,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(list_empty(&path->nodes[level]->dirty)); + BUG_ON(!buffer_dirty(path->nodes[level])); return 0; } @@ -721,9 +724,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_buffer *t; + struct buffer_head *t; struct btrfs_node *c; - struct btrfs_buffer *split_buffer; + struct buffer_head *split_buffer; struct btrfs_node *split; int mid; int ret; @@ -731,7 +734,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root u32 c_nritems; t = path->nodes[level]; - c = &t->node; + c = btrfs_buffer_node(t); if (t == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); @@ -740,11 +743,11 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root); - split = &split_buffer->node; + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->blocknr); + btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); btrfs_set_header_parentid(&split->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; memcpy(split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -752,9 +755,9 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(list_empty(&t->dirty)); + BUG_ON(!buffer_dirty(t)); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->blocknr, path->slots[level + 1] + 1, + split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -798,11 +801,12 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *left_buf = path->nodes[0]; - struct btrfs_leaf *left = &left_buf->leaf; + struct buffer_head *left_buf = path->nodes[0]; + struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); struct btrfs_leaf *right; - struct btrfs_buffer *right_buf; - struct btrfs_buffer *upper; + struct buffer_head *right_buf; + struct buffer_head *upper; + struct btrfs_node *upper_node; int slot; int i; int free_space; @@ -817,12 +821,13 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - if (slot >= btrfs_header_nritems(&upper->node.header) - 1) { + upper_node = btrfs_buffer_node(upper); + if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { return 1; } - right_buf = read_tree_block(root, btrfs_node_blockptr(&upper->node, - slot + 1)); - right = &right_buf->leaf; + right_buf = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -830,7 +835,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); - right = &right_buf->leaf; + right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, right_buf); @@ -881,11 +886,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(list_empty(&left_buf->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); - memcpy(&upper->node.ptrs[slot + 1].key, + BUG_ON(!buffer_dirty(left_buf)); + BUG_ON(!buffer_dirty(right_buf)); + memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(list_empty(&upper->dirty)); + BUG_ON(!buffer_dirty(upper)); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -905,9 +910,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *right_buf = path->nodes[0]; - struct btrfs_leaf *right = &right_buf->leaf; - struct btrfs_buffer *t; + struct buffer_head *right_buf = path->nodes[0]; + struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); + struct buffer_head *t; struct btrfs_leaf *left; int slot; int i; @@ -926,9 +931,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[1]) { return 1; } - t = read_tree_block(root, btrfs_node_blockptr(&path->nodes[1]->node, - slot - 1)); - left = &t->leaf; + t = read_tree_block(root, + btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -937,7 +942,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* cow and double check */ btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); - left = &t->leaf; + left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { btrfs_block_release(root, t); @@ -999,8 +1004,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(list_empty(&t->dirty)); - BUG_ON(list_empty(&right_buf->dirty)); + BUG_ON(!buffer_dirty(t)); + BUG_ON(!buffer_dirty(right_buf)); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1029,13 +1034,13 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct btrfs_buffer *l_buf; + struct buffer_head *l_buf; struct btrfs_leaf *l; u32 nritems; int mid; int slot; struct btrfs_leaf *right; - struct btrfs_buffer *right_buffer; + struct buffer_head *right_buffer; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1053,7 +1058,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return wret; } l_buf = path->nodes[0]; - l = &l_buf->leaf; + l = btrfs_buffer_leaf(l_buf); /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1071,7 +1076,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); BUG_ON(mid == nritems); - right = &right_buffer->leaf; + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); if (mid <= slot) { /* FIXME, just alloc a new leaf here */ @@ -1085,10 +1090,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); } btrfs_set_header_nritems(&right->header, nritems - mid); - btrfs_set_header_blocknr(&right->header, right_buffer->blocknr); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, - btrfs_header_parentid(&root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); memcpy(right->items, l->items + mid, @@ -1107,11 +1112,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->blocknr, path->slots[1] + 1, 1); + right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(list_empty(&right_buffer->dirty)); - BUG_ON(list_empty(&l_buf->dirty)); + BUG_ON(!buffer_dirty(right_buffer)); + BUG_ON(!buffer_dirty(l_buf)); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1136,7 +1141,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int slot; int slot_orig; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1156,7 +1161,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root slot_orig = path->slots[0]; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); nritems = btrfs_header_nritems(&leaf->header); data_end = leaf_data_end(root, leaf); @@ -1202,7 +1207,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(list_empty(&leaf_buf->dirty)); + BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1225,7 +1230,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], u8); + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], u8); memcpy(ptr, data, data_size); } btrfs_release_path(root, &path); @@ -1243,12 +1249,12 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { struct btrfs_node *node; - struct btrfs_buffer *parent = path->nodes[level]; + struct buffer_head *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = &parent->node; + node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { memmove(node->ptrs + slot, node->ptrs + slot + 1, @@ -1257,16 +1263,17 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems--; btrfs_set_header_nritems(&node->header, nritems); if (nritems == 0 && parent == root->node) { - BUG_ON(btrfs_header_level(&root->node->node.header) != 1); + struct btrfs_header *header = btrfs_buffer_header(root->node); + BUG_ON(btrfs_header_level(header) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(&root->node->node.header, 0); + btrfs_set_header_level(header, 0); } else if (slot == 0) { wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, level + 1); if (wret) ret = wret; } - BUG_ON(list_empty(&parent->dirty)); + BUG_ON(!buffer_dirty(parent)); return ret; } @@ -1279,7 +1286,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, { int slot; struct btrfs_leaf *leaf; - struct btrfs_buffer *leaf_buf; + struct buffer_head *leaf_buf; int doff; int dsize; int ret = 0; @@ -1287,7 +1294,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 nritems; leaf_buf = path->nodes[0]; - leaf = &leaf_buf->leaf; + leaf = btrfs_buffer_leaf(leaf_buf); slot = path->slots[0]; doff = btrfs_item_offset(leaf->items + slot); dsize = btrfs_item_size(leaf->items + slot); @@ -1313,14 +1320,13 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (nritems == 0) { if (leaf_buf == root->node) { btrfs_set_header_level(&leaf->header, 0); - BUG_ON(list_empty(&leaf_buf->dirty)); } else { clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->blocknr, 1, 1); + leaf_buf->b_blocknr, 1, 1); if (wret) ret = wret; } @@ -1332,7 +1338,6 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(list_empty(&leaf_buf->dirty)); /* delete the leaf if it is mostly empty */ if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { @@ -1341,7 +1346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - leaf_buf->count++; + get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); if (wret < 0) ret = wret; @@ -1352,7 +1357,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->blocknr; + u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) @@ -1380,19 +1385,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct btrfs_buffer *c; - struct btrfs_buffer *next = NULL; + struct buffer_head *c; + struct btrfs_node *c_node; + struct buffer_head *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; slot = path->slots[level] + 1; c = path->nodes[level]; - if (slot >= btrfs_header_nritems(&c->node.header)) { + c_node = btrfs_buffer_node(c); + if (slot >= btrfs_header_nritems(&c_node->header)) { level++; continue; } - blocknr = btrfs_node_blockptr(&c->node, slot); + blocknr = btrfs_node_blockptr(c_node, slot); if (next) btrfs_block_release(root, next); next = read_tree_block(root, blocknr); @@ -1408,7 +1415,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!level) break; next = read_tree_block(root, - btrfs_node_blockptr(&next->node, 0)); + btrfs_node_blockptr(btrfs_buffer_node(next), 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ae8518cb94bf..7748eecd9304 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,6 +1,9 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include +#include + struct btrfs_trans_handle; #define BTRFS_MAGIC "_BtRfS_M" @@ -10,6 +13,12 @@ struct btrfs_trans_handle; #define BTRFS_INODE_MAP_OBJECTID 3 #define BTRFS_FS_TREE_OBJECTID 4 +/* + * we can actually store much bigger names, but lets not confuse the rest + * of linux + */ +#define BTRFS_NAME_LEN 255 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -57,7 +66,7 @@ struct btrfs_header { #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) -struct btrfs_buffer; +struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -120,7 +129,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct btrfs_buffer *nodes[BTRFS_MAX_LEVEL]; + struct buffer_head *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; }; @@ -211,17 +220,14 @@ struct btrfs_fs_info { struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; - struct radix_tree_root cache_radix; struct radix_tree_root pinned_radix; - struct list_head trans; - struct list_head cache; u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - int cache_size; - int fp; struct btrfs_trans_handle *running_transaction; struct btrfs_super_block *disk_super; + struct buffer_head *sb_buffer; + struct super_block *sb; }; /* @@ -230,8 +236,8 @@ struct btrfs_fs_info { * only for the extent tree. */ struct btrfs_root { - struct btrfs_buffer *node; - struct btrfs_buffer *commit_root; + struct buffer_head *node; + struct buffer_head *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -389,6 +395,29 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } +static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->sec); +} + +static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->sec = cpu_to_le32(val); +} + +static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) +{ + return le32_to_cpu(ts->nsec); +} + +static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, + u32 val) +{ + ts->nsec = cpu_to_le32(val); +} + + static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { @@ -757,15 +786,20 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline struct btrfs_root *btrfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root @@ -783,7 +817,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap); + *root, struct buffer_head *snap); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -800,8 +834,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod); + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 4d8083d92fa0..75d6e373e98d 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -18,12 +18,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - if (name_len == 1 && *name == '.') - key.offset = 1; - else if (name_len == 2 && name[0] == '.' && name[1] == '.') - key.offset = 2; - else - ret = btrfs_name_hash(name, name_len, &key.offset); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); btrfs_init_path(&path); data_size = sizeof(*dir_item) + name_len; @@ -31,7 +26,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) goto out; - dir_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); @@ -45,8 +41,8 @@ out: } int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, char *name, - int name_len, int mod) + *root, struct btrfs_path *path, u64 dir, + const char *name, int name_len, int mod) { int ret; struct btrfs_key key; @@ -69,7 +65,8 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_dir_item *dir_item; char *name_ptr; - dir_item = btrfs_item_ptr(&path->nodes[0]->leaf, path->slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); if (btrfs_dir_name_len(dir_item) != name_len) return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05637f9fd7c7..df2061a735cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,165 +1,67 @@ -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -static int allocated_blocks = 0; -int cache_max = 10000; - -static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) + struct btrfs_node *node = btrfs_buffer_node(buf); + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); - if (root->node && btrfs_header_parentid(&buf->node.header) != - btrfs_header_parentid(&root->node->node.header)) + if (root->node && btrfs_header_parentid(&node->header) != + btrfs_header_parentid(btrfs_buffer_header(root->node))) BUG(); return 0; } -static int free_some_buffers(struct btrfs_root *root) +struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct list_head *node, *next; - struct btrfs_buffer *b; - if (root->fs_info->cache_size < cache_max) - return 0; - list_for_each_safe(node, next, &root->fs_info->cache) { - b = list_entry(node, struct btrfs_buffer, cache); - if (b->count == 1) { - BUG_ON(!list_empty(&b->dirty)); - list_del_init(&b->cache); - btrfs_block_release(root, b); - if (root->fs_info->cache_size < cache_max) - break; - } - } - return 0; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - int ret; - - buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); - if (!buf) - return buf; - allocated_blocks++; - buf->blocknr = blocknr; - buf->count = 2; - INIT_LIST_HEAD(&buf->dirty); - free_some_buffers(root); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); - radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->fs_info->cache); - root->fs_info->cache_size++; - if (ret) { - free(buf); - return NULL; - } - return buf; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) { - BUG(); - return NULL; - } - } - return buf; -} - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) -{ - loff_t offset = blocknr * root->blocksize; - struct btrfs_buffer *buf; - int ret; + struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fs_info->fp, &buf->node, root->blocksize, - offset); - if (ret != root->blocksize) { - free(buf); - return NULL; - } - } + if (!buf) + return buf; if (check_tree_block(root, buf)) BUG(); return buf; } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) - return 0; - list_add_tail(&buf->dirty, &root->fs_info->trans); - buf->count++; + mark_buffer_dirty(buf); return 0; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) { - list_del_init(&buf->dirty); - btrfs_block_release(root, buf); - } + clear_buffer_dirty(buf); return 0; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - u64 blocknr = buf->blocknr; - loff_t offset = blocknr * root->blocksize; - int ret; - - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) - BUG(); - ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); - if (ret != root->blocksize) - return ret; + mark_buffer_dirty(buf); return 0; } static int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_buffer *b; - int ret = 0; - int wret; - while(!list_empty(&root->fs_info->trans)) { - b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, - dirty); - list_del_init(&b->dirty); - wret = write_tree_block(trans, root, b); - if (wret) - ret = wret; - btrfs_block_release(root, b); - } - return ret; + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; } static int commit_tree_roots(struct btrfs_trans_handle *trans, @@ -172,17 +74,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *inode_root = fs_info->inode_root; btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->blocknr); + inode_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &inode_root->root_key, &inode_root->root_item); BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->blocknr) + if (old_extent_block == extent_root->node->b_blocknr) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->blocknr); + extent_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -195,7 +97,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; - struct btrfs_buffer *snap = root->commit_root; + struct buffer_head *snap = root->commit_root; struct btrfs_key snap_key; if (root->commit_root == root->node) @@ -204,7 +106,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; - btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); @@ -220,7 +122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); @@ -234,7 +136,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, - u64 objectid, int fp) + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -250,11 +152,11 @@ static int find_and_setup_root(struct btrfs_super_block *super, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, - struct btrfs_root *root, int fp) + struct btrfs_root *root) { int ret; - __setup_root(super, root, fs_info, objectid, fp); + __setup_root(super, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -265,32 +167,26 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super) { - int fp; - - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - return NULL; - } - return open_ctree_fd(fp, super); -} - -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) -{ - struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); + struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + GFP_NOFS); int ret; - INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + /* FIXME: don't be stupid */ + if (!btrfs_super_root(disk_super)) + return NULL; INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); - INIT_LIST_HEAD(&fs_info->trans); - INIT_LIST_HEAD(&fs_info->cache); - fs_info->cache_size = 0; - fs_info->fp = fp; fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -298,36 +194,31 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = super; + fs_info->disk_super = disk_super; + fs_info->sb_buffer = sb_buffer; + fs_info->sb = sb; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret == 0 || btrfs_super_root(super) == 0) { - BUG(); - return NULL; - } - BUG_ON(ret < 0); - - __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; return root; @@ -336,8 +227,11 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { + return 0; +#if 0 int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { @@ -345,35 +239,38 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } return 0; +#endif } static int drop_cache(struct btrfs_root *root) { + return 0; +#if 0 while(!list_empty(&root->fs_info->cache)) { - struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, - struct btrfs_buffer, + struct buffer_head *b = list_entry(root->fs_info->cache.next, + struct buffer_head, cache); list_del_init(&b->cache); btrfs_block_release(root, b); } return 0; +#endif } -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) + +int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, s); + btrfs_commit_transaction(trans, root, root->fs_info->disk_super); ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root, s); + write_ctree_super(trans, root, root->fs_info->disk_super); drop_cache(root); - BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); if (root->fs_info->extent_root->node) @@ -386,29 +283,17 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) btrfs_block_release(root->fs_info->tree_root, root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + btrfs_block_release(root, root->fs_info->sb_buffer); + kfree(root->fs_info->extent_root); + kfree(root->fs_info->inode_root); + kfree(root->fs_info->tree_root); + kfree(root->fs_info); + kfree(root); return 0; } -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - buf->count--; - if (buf->count < 0) - BUG(); - if (buf->count == 0) { - BUG_ON(!list_empty(&buf->cache)); - BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->fs_info->cache_radix, - buf->blocknr)) - BUG(); - radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); - memset(buf, 0, sizeof(*buf)); - free(buf); - BUG_ON(allocated_blocks == 0); - allocated_blocks--; - BUG_ON(root->fs_info->cache_size == 0); - root->fs_info->cache_size--; - } + brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index d888cf5c350b..7f4bb729b734 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,36 +1,41 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include "list.h" -struct btrfs_buffer { - u64 blocknr; - int count; - struct list_head dirty; - struct list_head cache; - union { - struct btrfs_node node; - struct btrfs_leaf leaf; - }; -}; +#include -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr); +#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) + +static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) +{ + return (struct btrfs_node *)bh->b_data; +} + +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) +{ + return (struct btrfs_leaf *)bh->b_data; +} + +static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) +{ + return &((struct btrfs_node *)bh->b_data)->header; +} + +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf); + struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_buffer *buf); + struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *s); -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super); -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s); -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf); +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super); +int close_ctree(struct btrfs_root *root); +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s); int mkfs(int fd, u64 num_blocks, u32 blocksize); - -#define BTRFS_SUPER_INFO_OFFSET (16 * 1024) - #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 53a7550b5c1e..e3af2c035687 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10,9 +10,8 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root search_end, struct btrfs_key *ins); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root); - +static int del_pending_extents(struct btrfs_trans_handle *trans, struct + btrfs_root *extent_root); /* * pending extents are blocks that we're trying to allocate in the extent * map while trying to grow the map because of other allocations. To avoid @@ -21,6 +20,7 @@ static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root * manner for deletes. */ #define CTREE_EXTENT_PENDING_DEL 0 +#define CTREE_EXTENT_PINNED 1 static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr) @@ -45,15 +45,14 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root if (ret != 0) BUG(); BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - BUG_ON(list_empty(&path.nodes[0]->dirty)); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); - run_pending(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); return 0; } @@ -74,7 +73,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root 0, 0); if (ret != 0) BUG(); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); btrfs_release_path(root->fs_info->extent_root, &path); @@ -82,18 +81,20 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { u64 blocknr; + struct btrfs_node *buf_node; int i; if (!root->ref_cows) return 0; - if (btrfs_is_leaf(&buf->node)) + buf_node = btrfs_buffer_node(buf); + if (btrfs_is_leaf(buf_node)) return 0; - for (i = 0; i < btrfs_header_nritems(&buf->node.header); i++) { - blocknr = btrfs_node_blockptr(&buf->node, i); + for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + blocknr = btrfs_node_blockptr(buf_node, i); inc_block_ref(trans, root, blocknr); } return 0; @@ -108,9 +109,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int i; while(1) { - ret = radix_tree_gang_lookup(&root->fs_info->pinned_radix, + ret = radix_tree_gang_lookup_tag(&root->fs_info->pinned_radix, (void **)gang, 0, - ARRAY_SIZE(gang)); + ARRAY_SIZE(gang), + CTREE_EXTENT_PINNED); if (!ret) break; if (!first) @@ -137,7 +139,7 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, - btrfs_header_parentid(&extent_root->node->node.header)); + btrfs_header_parentid(btrfs_buffer_header(extent_root->node))); ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); @@ -156,11 +158,24 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } +static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) +{ + int err; + err = radix_tree_insert(&root->fs_info->pinned_radix, + blocknr, (void *)blocknr); + BUG_ON(err); + if (err) + return err; + radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, + tag); + return 0; +} + /* * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; struct btrfs_key key; @@ -171,7 +186,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key ins; u32 refs; - BUG_ON(pin && num_blocks != 1); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -186,26 +200,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; - if (pin) { - int err; - radix_tree_preload(GFP_KERNEL); - err = radix_tree_insert(&info->pinned_radix, - blocknr, (void *)blocknr); - BUG_ON(err); - radix_tree_preload_end(); - } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (!pin && extent_root->fs_info->last_insert.objectid > + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) @@ -224,39 +230,32 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root) { int ret; - struct btrfs_buffer *gang[4]; + int wret; + int err = 0; + unsigned long gang[4]; int i; + struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; while(1) { ret = radix_tree_gang_lookup_tag( - &extent_root->fs_info->cache_radix, + &extent_root->fs_info->pinned_radix, (void **)gang, 0, ARRAY_SIZE(gang), CTREE_EXTENT_PENDING_DEL); if (!ret) break; for (i = 0; i < ret; i++) { - ret = __free_extent(trans, extent_root, - gang[i]->blocknr, 1, 1); - radix_tree_tag_clear(&extent_root->fs_info->cache_radix, - gang[i]->blocknr, + radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i], CTREE_EXTENT_PENDING_DEL); - btrfs_block_release(extent_root, gang[i]); + wret = __free_extent(trans, extent_root, gang[i], 1); + if (wret) + err = wret; } } - return 0; + return err; } -static int run_pending(struct btrfs_trans_handle *trans, struct btrfs_root - *extent_root) -{ - while(radix_tree_tagged(&extent_root->fs_info->cache_radix, - CTREE_EXTENT_PENDING_DEL)) - del_pending_extents(trans, extent_root); - return 0; -} - - /* * remove an extent from the root, returns 0 on success */ @@ -264,18 +263,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_buffer *t; + struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { t = find_tree_block(root, blocknr); - radix_tree_tag_set(&root->fs_info->cache_radix, blocknr, - CTREE_EXTENT_PENDING_DEL); + pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); - pending_ret = run_pending(trans, root->fs_info->extent_root); + if (pin) { + ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + ret = __free_extent(trans, root, blocknr, num_blocks); + pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -296,14 +298,16 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block; + u64 last_block = 0; u64 test_block; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; int total_needed = num_blocks; + int level; - total_needed += (btrfs_header_level(&root->node->node.header) + 1) * 3; + level = btrfs_header_level(btrfs_buffer_header(root->node)); + total_needed += (level + 1) * 3; if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; @@ -323,7 +327,7 @@ check_failed: path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -429,7 +433,7 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(extent_item)); finish_current_insert(trans, extent_root); - pending_ret = run_pending(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); if (ret) return ret; if (pending_ret) @@ -441,16 +445,15 @@ static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_key ins; int ret; - struct btrfs_buffer *buf; + struct buffer_head *buf; ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, - btrfs_header_parentid(&root->node->node.header), - &ins); + btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); return NULL; @@ -467,13 +470,13 @@ struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct btrfs_buffer *next; - struct btrfs_buffer *cur; + struct buffer_head *next; + struct buffer_head *cur; u64 blocknr; int ret; u32 refs; - ret = lookup_block_ref(trans, root, path->nodes[*level]->blocknr, + ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, &refs); BUG_ON(ret); if (refs > 1) @@ -484,9 +487,10 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root while(*level > 0) { cur = path->nodes[*level]; if (path->slots[*level] >= - btrfs_header_nritems(&cur->node.header)) + btrfs_header_nritems(btrfs_buffer_header(cur))) break; - blocknr = btrfs_node_blockptr(&cur->node, path->slots[*level]); + blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), + path->slots[*level]); ret = lookup_block_ref(trans, root, blocknr, &refs); if (refs != 1 || *level == 1) { path->slots[*level]++; @@ -499,12 +503,12 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(&next->node.header); + *level = btrfs_header_level(btrfs_buffer_header(next)); path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->blocknr, 1, - 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, + 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -525,14 +529,14 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < - btrfs_header_nritems(&path->nodes[i]->node.header)- 1) { + if (slot < btrfs_header_nritems( + btrfs_buffer_header(path->nodes[i])) - 1) { path->slots[i]++; *level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->blocknr, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; @@ -549,7 +553,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root * decremented. */ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_buffer *snap) + *root, struct buffer_head *snap) { int ret = 0; int wret; @@ -560,7 +564,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_init_path(&path); - level = btrfs_header_level(&snap->node.header); + level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; path.nodes[level] = snap; path.slots[level] = 0; diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 6c2a71a46c7d..22519b8e0cf2 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -10,6 +10,7 @@ * License. */ +#include #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) @@ -63,6 +64,14 @@ int btrfs_name_hash(const char *name, int len, u64 *hash_result) const char *p; __u32 in[8], buf[2]; + if (len == 1 && *name == '.') { + *hash_result = 1; + return 0; + } else if (len == 2 && name[0] == '.' && name[1] == '.') { + *hash_result = 2; + return 0; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c7fda3bf7b21..c45aec258bd5 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -15,7 +15,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int ret; u64 hole_size = 0; int slot = 0; - u64 last_ino; + u64 last_ino = 0; int start_found; struct btrfs_leaf *l; struct btrfs_root *root = fs_root->fs_info->inode_root; @@ -40,7 +40,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path.slots[0]--; while (1) { - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { ret = btrfs_next_leaf(root, &path); @@ -105,8 +105,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, if (ret) goto out; - inode_item = btrfs_item_ptr(&path.nodes[0]->leaf, path.slots[0], - struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); out: btrfs_release_path(inode_root, &path); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index aa2d3fac8804..c8ee938c1251 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -17,7 +17,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("leaf %Lu total ptrs %d free space %d\n", btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(root, l)); - fflush(stdout); for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); @@ -67,10 +66,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) btrfs_leaf_data(l) + btrfs_item_offset(item)); break; }; - fflush(stdout); } } -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) + +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) { int i; u32 nr; @@ -78,16 +77,16 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) if (!t) return; - c = &t->node; + c = btrfs_buffer_node(t); nr = btrfs_header_nritems(&c->header); if (btrfs_is_leaf(c)) { btrfs_print_leaf(root, (struct btrfs_leaf *)c); return; } - printk("node %Lu level %d total ptrs %d free spc %u\n", t->blocknr, - btrfs_header_level(&c->header), nr, - (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); - fflush(stdout); + printk("node %Lu level %d total ptrs %d free spc %u\n", + btrfs_header_blocknr(&c->header), + btrfs_header_level(&c->header), nr, + (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { printk("\tkey %d (%Lu %u %Lu) block %Lu\n", i, @@ -95,12 +94,11 @@ void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t) c->ptrs[i].key.flags, c->ptrs[i].key.offset, btrfs_node_blockptr(c, i)); - fflush(stdout); } for (i = 0; i < nr; i++) { - struct btrfs_buffer *next_buf = read_tree_block(root, + struct buffer_head *next_buf = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = &next_buf->node; + struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && btrfs_header_level(&c->header) != 1) BUG(); diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 0882ca904eca..396041a05cfa 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,5 +1,5 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct btrfs_buffer *t); +void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 52c83be4b307..a4554c007ef7 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -21,7 +21,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, if (ret < 0) goto out; BUG_ON(ret == 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); BUG_ON(path.slots[0] == 0); slot = path.slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { @@ -51,7 +51,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = &path.nodes[0]->leaf; + l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ae76044aeab..ccc056aad692 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,13 +9,18 @@ #include #include #include "ctree.h" +#include "disk-io.h" #define BTRFS_SUPER_MAGIC 0x9123682E + +static struct inode_operations btrfs_dir_inode_operations; +static struct super_operations btrfs_super_ops; +static struct file_operations btrfs_dir_file_operations; + #if 0 /* some random number */ static struct super_operations ramfs_ops; -static struct inode_operations ramfs_dir_inode_operations; static struct backing_dev_info ramfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -129,46 +135,243 @@ static struct inode_operations ramfs_dir_inode_operations = { }; #endif -struct inode *btrfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static void btrfs_read_locked_inode(struct inode *inode) { - struct inode * inode = new_inode(sb); + struct btrfs_path path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; +printk("read locked inode %lu\n", inode->i_ino); + btrfs_init_path(&path); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + if (ret) { + make_bad_inode(inode); + return; + } + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +printk("found locked inode %lu\n", inode->i_ino); + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); +printk("about to release\n"); + btrfs_release_path(root, &path); + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: +printk("inode %lu now a file\n", inode->i_ino); + break; + case S_IFDIR: +printk("inode %lu now a directory\n", inode->i_ino); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + break; + case S_IFLNK: +printk("inode %lu now a link\n", inode->i_ino); + // inode->i_op = &page_symlink_inode_operations; + break; } - return inode; +printk("returning!\n"); + return; } -static struct super_operations btrfs_ops = { - .statfs = simple_statfs, - .drop_inode = generic_delete_inode, -}; +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + ino_t *ino) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int ret; + + btrfs_init_path(&path); + ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + namelen, 0); + if (ret) { + *ino = 0; + goto out; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + *ino = btrfs_dir_objectid(di); +out: + btrfs_release_path(root, &path); + return ret; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + ino_t ino; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + + ret = btrfs_inode_by_name(dir, dentry, &ino); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (ino) { +printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); + inode = iget(dir->i_sb, ino); + if (!inode) + return ERR_PTR(-EACCES); + } + return d_splice_alias(inode, dentry); +} + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type = DT_UNKNOWN; + int over; + + key.objectid = inode->i_ino; +printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = filp->f_pos; + btrfs_init_path(&path); + ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + if (ret < 0) { + goto err; + } +printk("first ret %d\n", ret); + advance = filp->f_pos > 0 && ret != 0; + while(1) { + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + if (advance) { +printk("advancing!\n"); + if (slot == nritems -1) { + ret = btrfs_next_leaf(root, &path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path.nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path.slots[0]; +printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); + } else { + slot++; + path.slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; +printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), + btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + continue; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); +printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), + (const char *)(di + 1), btrfs_dir_objectid(di)); + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_dir_objectid(di), d_type); + if (over) + break; + filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + } +printk("filldir all done\n"); + ret = 0; +err: + btrfs_release_path(root, &path); + return ret; +} + +static void btrfs_put_super (struct super_block * sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + int ret; + + ret = close_ctree(root); + if (ret) { + printk("close ctree returns %d\n", ret); + } + sb->s_fs_info = NULL; +} static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; - struct dentry * root; + struct dentry * root_dentry; + struct btrfs_super_block *disk_super; + struct buffer_head *bh; + struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; - sb->s_op = &btrfs_ops; + sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - inode = btrfs_get_inode(sb, S_IFDIR | 0755, 0); + + bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); + if (!bh) { + printk("btrfs: unable to read on disk super\n"); + return -EIO; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + root = open_ctree(sb, bh, disk_super); + sb->s_fs_info = root; + if (!root) { + printk("btrfs: open_ctree failed\n"); + return -EIO; + } + printk("read in super total blocks %Lu root %Lu\n", + btrfs_super_total_blocks(disk_super), + btrfs_super_root_dir(disk_super)); + + inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); if (!inode) return -ENOMEM; + if (inode->i_state & I_NEW) { + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } - root = d_alloc_root(inode); - if (!root) { + root_dentry = d_alloc_root(inode); + if (!root_dentry) { iput(inode); return -ENOMEM; } - sb->s_root = root; + sb->s_root = root_dentry; + return 0; } @@ -187,6 +390,24 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static struct super_operations btrfs_super_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .put_super = btrfs_put_super, + .read_inode = btrfs_read_locked_inode, +}; + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, +}; + + static int __init init_btrfs_fs(void) { printk("btrfs loaded!\n"); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3adb6e69fe43..9ab27b7491c5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -10,7 +10,7 @@ struct btrfs_trans_handle { static inline struct btrfs_trans_handle * btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = malloc(sizeof(*h)); + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); h->transid = root->root_key.offset; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -21,7 +21,7 @@ static inline void btrfs_free_transaction(struct btrfs_root *root, struct btrfs_trans_handle *handle) { memset(handle, 0, sizeof(*handle)); - free(handle); + kfree(handle); } #endif -- cgit v1.2.3 From 79154b1b5bcf87903db7ff16a30b360b78d6fe4f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 15:59:16 -0400 Subject: Btrfs: transaction rework Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 4 +- fs/btrfs/disk-io.c | 115 +++------------------------- fs/btrfs/disk-io.h | 8 +- fs/btrfs/super.c | 18 ----- fs/btrfs/transaction.c | 199 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 35 +++++---- 7 files changed, 235 insertions(+), 146 deletions(-) create mode 100644 fs/btrfs/transaction.c (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 1a0fb7422393..be7d74cdca04 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -3,7 +3,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ - hash.o file-item.o inode-item.o inode-map.o disk-io.o + hash.o file-item.o inode-item.o inode-map.o disk-io.o transaction.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7748eecd9304..4c19a3f12afd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -5,6 +5,7 @@ #include struct btrfs_trans_handle; +struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" @@ -224,10 +225,11 @@ struct btrfs_fs_info { u64 last_inode_alloc; u64 last_inode_alloc_dirid; u64 generation; - struct btrfs_trans_handle *running_transaction; + struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct mutex trans_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index df2061a735cd..9cacca0c525c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,89 +50,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - mark_buffer_dirty(buf); - return 0; -} - -static int __commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root) -{ - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); - return 0; -} - -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - int ret; - u64 old_extent_block; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); - while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) - break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); - BUG_ON(ret); - } - return 0; -} - -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_super_block *s) -{ - int ret = 0; - struct buffer_head *snap = root->commit_root; - struct btrfs_key snap_key; - - if (root->commit_root == root->node) - return 0; - - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - - ret = __commit_transaction(trans, root); - BUG_ON(ret); - - write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); - - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - - ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - - return ret; -} - static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -197,6 +114,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->disk_super = disk_super; fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + mutex_init(&fs_info->trans_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); @@ -225,7 +143,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s) + *root) { return 0; #if 0 @@ -242,34 +160,19 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root #endif } -static int drop_cache(struct btrfs_root *root) -{ - return 0; -#if 0 - while(!list_empty(&root->fs_info->cache)) { - struct buffer_head *b = list_entry(root->fs_info->cache.next, - struct buffer_head, - cache); - list_del_init(&b->cache); - btrfs_block_release(root, b); - } - return 0; -#endif -} - int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; - trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, root->fs_info->disk_super); - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - ret = __commit_transaction(trans, root); + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + /* run commit again to drop the original snapshot */ + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - write_ctree_super(trans, root, root->fs_info->disk_super); - drop_cache(root); + write_ctree_super(NULL, root); if (root->node) btrfs_block_release(root, root->node); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 7f4bb729b734..099f7eea0ec7 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -28,14 +28,14 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s); +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct btrfs_root *open_ctree(struct super_block *sb, struct buffer_head *sb_buffer, struct btrfs_super_block *disk_super); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_super_block *s); +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int mkfs(int fd, u64 num_blocks, u32 blocksize); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ccc056aad692..620430825840 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -141,7 +141,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; -printk("read locked inode %lu\n", inode->i_ino); btrfs_init_path(&path); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { @@ -152,7 +151,6 @@ printk("read locked inode %lu\n", inode->i_ino); path.slots[0], struct btrfs_inode_item); -printk("found locked inode %lu\n", inode->i_ino); inode->i_mode = btrfs_inode_mode(inode_item); inode->i_nlink = btrfs_inode_nlink(inode_item); inode->i_uid = btrfs_inode_uid(inode_item); @@ -166,7 +164,6 @@ printk("found locked inode %lu\n", inode->i_ino); inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); -printk("about to release\n"); btrfs_release_path(root, &path); switch (inode->i_mode & S_IFMT) { #if 0 @@ -176,19 +173,15 @@ printk("about to release\n"); break; #endif case S_IFREG: -printk("inode %lu now a file\n", inode->i_ino); break; case S_IFDIR: -printk("inode %lu now a directory\n", inode->i_ino); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; break; case S_IFLNK: -printk("inode %lu now a link\n", inode->i_ino); // inode->i_op = &page_symlink_inode_operations; break; } -printk("returning!\n"); return; } @@ -232,7 +225,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (ino) { -printk("lookup on %.*s returns %lu\n", dentry->d_name.len, dentry->d_name.name, ino); inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); @@ -257,7 +249,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int over; key.objectid = inode->i_ino; -printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; @@ -266,15 +257,12 @@ printk("readdir on dir %Lu pos %Lu\n", key.objectid, filp->f_pos); if (ret < 0) { goto err; } -printk("first ret %d\n", ret); advance = filp->f_pos > 0 && ret != 0; while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); if (advance) { -printk("advancing!\n"); if (slot == nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) @@ -282,7 +270,6 @@ printk("advancing!\n"); leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slot); } else { slot++; path.slots[0]++; @@ -290,15 +277,11 @@ printk("2leaf %Lu nritems %lu slot %d\n", path.nodes[0]->b_blocknr, nritems, slo } advance = 1; item = leaf->items + slot; -printk("item key %Lu %u %Lu\n", btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), btrfs_disk_key_offset(&item->key)); if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) continue; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); -printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), - (const char *)(di + 1), btrfs_dir_objectid(di)); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -307,7 +290,6 @@ printk("filldir name %.*s, objectid %Lu\n", btrfs_dir_name_len(di), break; filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; } -printk("filldir all done\n"); ret = 0; err: btrfs_release_path(root, &path); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c new file mode 100644 index 000000000000..8dc1c170f10f --- /dev/null +++ b/fs/btrfs/transaction.c @@ -0,0 +1,199 @@ +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" + + +static void put_transaction(struct btrfs_transaction *transaction) +{ + transaction->use_count--; + if (transaction->use_count == 0) + kfree(transaction); +} + +static int join_transaction(struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + cur_trans = root->fs_info->running_transaction; + if (!cur_trans) { + cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + BUG_ON(!cur_trans); + root->fs_info->running_transaction = cur_trans; + cur_trans->num_writers = 0; + cur_trans->transid = root->root_key.offset + 1; + init_waitqueue_head(&cur_trans->writer_wait); + init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->in_commit = 0; + cur_trans->use_count = 0; + cur_trans->commit_done = 0; + } + cur_trans->num_writers++; + return 0; +} + +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks) +{ + struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + int ret; + + mutex_lock(&root->fs_info->trans_mutex); + ret = join_transaction(root); + BUG_ON(ret); + h->transid = root->fs_info->running_transaction->transid; + h->transaction = root->fs_info->running_transaction; + h->blocks_reserved = num_blocks; + h->blocks_used = 0; + root->fs_info->running_transaction->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); + return h; +} + +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans; + mutex_lock(&root->fs_info->trans_mutex); + cur_trans = root->fs_info->running_transaction; + WARN_ON(cur_trans->num_writers <= 1); + if (waitqueue_active(&cur_trans->writer_wait)) + wake_up(&cur_trans->writer_wait); + cur_trans->num_writers--; + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + kfree(trans); + return 0; +} + + +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; +} + +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + u64 old_extent_block; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->b_blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->b_blocknr); + ret = btrfs_update_root(trans, tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + return 0; +} + +static int wait_for_commit(struct btrfs_root *root, + struct btrfs_transaction *commit) +{ + DEFINE_WAIT(wait); + commit->use_count++; + while(!commit->commit_done) { + prepare_to_wait(&commit->commit_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (commit->commit_done) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&commit->commit_wait, &wait); + return 0; +} + +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret = 0; + struct buffer_head *snap = root->commit_root; + struct btrfs_key snap_key; + struct btrfs_transaction *cur_trans; + DEFINE_WAIT(wait); + + mutex_lock(&root->fs_info->trans_mutex); + if (trans->transaction->in_commit) { + cur_trans = trans->transaction; + trans->transaction->use_count++; + btrfs_end_transaction(trans, root); + ret = wait_for_commit(root, cur_trans); + BUG_ON(ret); + put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); + return 0; + } + while (trans->transaction->num_writers > 1) { + prepare_to_wait(&trans->transaction->writer_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (trans->transaction->num_writers <= 1) + break; + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); + } + finish_wait(&trans->transaction->writer_wait, &wait); + + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + } + + ret = btrfs_commit_tree_roots(trans, root); + BUG_ON(ret); + + ret = btrfs_write_and_wait_transaction(trans, root); + BUG_ON(ret); + + write_ctree_super(trans, root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + put_transaction(cur_trans); + kfree(trans); + + if (root->node != root->commit_root) { + trans = btrfs_start_transaction(root, 1); + root->commit_root = root->node; + get_bh(root->node); + ret = btrfs_drop_snapshot(trans, root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(trans, root->fs_info->tree_root, + &snap_key); + BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + } + + return ret; +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 9ab27b7491c5..7e6c08a0accd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,27 +1,30 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +struct btrfs_transaction { + u64 transid; + unsigned long num_writers; + int in_commit; + int use_count; + int commit_done; + wait_queue_head_t writer_wait; + wait_queue_head_t commit_wait; +}; + struct btrfs_trans_handle { u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; + struct btrfs_transaction *transaction; }; -static inline struct btrfs_trans_handle * -btrfs_start_transaction(struct btrfs_root *root, int num_blocks) -{ - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); - h->transid = root->root_key.offset; - h->blocks_reserved = num_blocks; - h->blocks_used = 0; - return h; -} - -static inline void btrfs_free_transaction(struct btrfs_root *root, - struct btrfs_trans_handle *handle) -{ - memset(handle, 0, sizeof(*handle)); - kfree(handle); -} +int btrfs_end_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, + int num_blocks); +int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif -- cgit v1.2.3 From d571976292839cec05a2820b08f7629b145ed157 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 10:01:08 -0400 Subject: btrfs_create, btrfs_write_super, btrfs_sync_fs Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.c | 49 +++++++++++-------- fs/btrfs/ctree.h | 2 +- fs/btrfs/dir-item.c | 3 +- fs/btrfs/disk-io.c | 23 ++++----- fs/btrfs/extent-tree.c | 26 ++++++---- fs/btrfs/inode-map.c | 1 + fs/btrfs/root-tree.c | 1 + fs/btrfs/super.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 13 +++-- 10 files changed, 201 insertions(+), 47 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index c5d67bbd4d90..4b5bd05de94f 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -3,6 +3,7 @@ * Fix ENOSPC handling * make a real mkfs and superblock * Do checksumming +* Use a real check instead of mark_buffer_dirty * Define FS objects in terms of different item types * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e690e2bb47d2..052434a328ec 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -39,7 +39,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow; struct btrfs_node *cow_node; - if (!buffer_dirty(buf)) { + if (buffer_dirty(buf)) { *cow_ret = buf; return 0; } @@ -48,6 +48,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); *cow_ret = cow; + mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -58,7 +59,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -362,7 +363,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&parent->ptrs[pslot + 1].key, &right->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -398,7 +399,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root /* update the parent key to reflect our changes */ memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(parent_buf)); + mark_buffer_dirty(parent_buf); } /* update the path */ @@ -539,7 +540,7 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root break; t = btrfs_buffer_node(path->nodes[i]); memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - BUG_ON(!buffer_dirty(path->nodes[i])); + mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -583,8 +584,8 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -631,8 +632,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - BUG_ON(!buffer_dirty(src_buf)); - BUG_ON(!buffer_dirty(dst_buf)); + mark_buffer_dirty(src_buf); + mark_buffer_dirty(dst_buf); return ret; } @@ -669,6 +670,9 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + + mark_buffer_dirty(t); + /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); root->node = t; @@ -708,7 +712,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - BUG_ON(!buffer_dirty(path->nodes[level])); + mark_buffer_dirty(path->nodes[level]); return 0; } @@ -755,7 +759,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, mid); ret = 0; - BUG_ON(!buffer_dirty(t)); + mark_buffer_dirty(t); + mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -886,11 +891,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - BUG_ON(!buffer_dirty(left_buf)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(left_buf); + mark_buffer_dirty(right_buf); memcpy(&upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - BUG_ON(!buffer_dirty(upper)); + mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -1004,8 +1009,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - BUG_ON(!buffer_dirty(t)); - BUG_ON(!buffer_dirty(right_buf)); + mark_buffer_dirty(t); + mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1115,8 +1120,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - BUG_ON(!buffer_dirty(right_buffer)); - BUG_ON(!buffer_dirty(l_buf)); + mark_buffer_dirty(right_buffer); + mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1202,12 +1207,12 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); + mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - BUG_ON(!buffer_dirty(leaf_buf)); if (btrfs_leaf_free_space(root, leaf) < 0) BUG(); check_leaf(root, path, 0); @@ -1233,6 +1238,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); memcpy(ptr, data, data_size); + mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1273,7 +1279,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - BUG_ON(!buffer_dirty(parent)); + mark_buffer_dirty(parent); return ret; } @@ -1368,8 +1374,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { + mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } + } else { + mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4c19a3f12afd..983e3cc9ae9f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -833,7 +833,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 75d6e373e98d..f81cbcc83b66 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -5,7 +5,7 @@ #include "transaction.h" int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, char *name, int name_len, u64 dir, u64 + *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type) { int ret = 0; @@ -35,6 +35,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9cacca0c525c..8e1dcda0839c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,19 +145,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return 0; -#if 0 - int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); - - ret = pwrite(root->fs_info->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(*s)) { - fprintf(stderr, "failed to write new super block err %d\n", ret); - return ret; + struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, + root->fs_info->tree_root->node->b_blocknr); + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + WARN_ON(1); + return -EIO; } return 0; -#endif } int close_ctree(struct btrfs_root *root) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3af2c035687..2818f1c57170 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -49,6 +49,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -103,7 +104,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[8]; + struct buffer_head *gang[8]; u64 first = 0; int ret; int i; @@ -116,13 +117,15 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct if (!ret) break; if (!first) - first = gang[0]; + first = gang[0]->b_blocknr; for (i = 0; i < ret; i++) { radix_tree_delete(&root->fs_info->pinned_radix, - gang[i]); + gang[i]->b_blocknr); + brelse(gang[i]); } } - root->fs_info->last_insert.objectid = first; + if (root->fs_info->last_insert.objectid > first) + root->fs_info->last_insert.objectid = first; root->fs_info->last_insert.offset = 0; return 0; } @@ -161,8 +164,10 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; + struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, - blocknr, (void *)blocknr); + blocknr, bh); BUG_ON(err); if (err) return err; @@ -217,6 +222,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } + mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; @@ -232,7 +238,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int ret; int wret; int err = 0; - unsigned long gang[4]; + struct buffer_head *gang[4]; int i; struct radix_tree_root *radix = &extent_root->fs_info->pinned_radix; @@ -245,10 +251,12 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_set(radix, gang[i], CTREE_EXTENT_PINNED); - radix_tree_tag_clear(radix, gang[i], + radix_tree_tag_set(radix, gang[i]->b_blocknr, + CTREE_EXTENT_PINNED); + radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); - wret = __free_extent(trans, extent_root, gang[i], 1); + wret = __free_extent(trans, extent_root, + gang[i]->b_blocknr, 1); if (wret) err = wret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index c45aec258bd5..ad2d375b830d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -108,6 +108,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a4554c007ef7..a821b5d1e233 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -55,6 +55,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root slot = path.slots[0]; memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, sizeof(*item)); + mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 620430825840..6080a8133d71 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -10,6 +10,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "transaction.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -357,6 +358,131 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); +} + +static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, + struct inode *dir, int mode) +{ + struct inode *inode; + struct btrfs_inode_item inode_item; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_key key; + int ret; + u64 objectid; + + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + BUG_ON(ret); + + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_ino = objectid; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + fill_inode_item(&inode_item, inode); + + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + BUG_ON(ret); + + insert_inode_hash(inode); + // FIXME mark_inode_dirty(inode) + return inode; +} + +static int btrfs_add_link(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int ret; + ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb), + dentry->d_name.name, dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + inode->i_ino, 0); + BUG_ON(ret); + return ret; +} + +static int btrfs_add_nondir(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int err = btrfs_add_link(trans, dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; +} + +static int btrfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct inode *inode; + int err; + + trans = btrfs_start_transaction(root, 1); + inode = btrfs_new_inode(trans, dir, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + return err; + // FIXME mark the inode dirty + err = btrfs_add_nondir(trans, dentry, inode); + dir->i_sb->s_dirt = 1; + btrfs_end_transaction(trans, root); + return err; +} + +static void btrfs_write_super(struct super_block *sb) +{ + sb->s_dirt = 0; +printk("btrfs write_super!\n"); +} + +static int btrfs_sync_fs(struct super_block *sb, int wait) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root; + int ret; + sb->s_dirt = 0; + root = btrfs_sb(sb); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); + sb->s_dirt = 0; + BUG_ON(ret); +printk("btrfs sync_fs\n"); + return 0; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -377,10 +503,13 @@ static struct super_operations btrfs_super_ops = { .drop_inode = generic_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, + .write_super = btrfs_write_super, + .sync_fs = btrfs_sync_fs, }; static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, + .create = btrfs_create, }; static struct file_operations btrfs_dir_file_operations = { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8dc1c170f10f..4903b47c9780 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -25,7 +25,7 @@ static int join_transaction(struct btrfs_root *root) init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->in_commit = 0; - cur_trans->use_count = 0; + cur_trans->use_count = 1; cur_trans->commit_done = 0; } cur_trans->num_writers++; @@ -56,7 +56,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; - WARN_ON(cur_trans->num_writers <= 1); + WARN_ON(cur_trans->num_writers < 1); if (waitqueue_active(&cur_trans->writer_wait)) wake_up(&cur_trans->writer_wait); cur_trans->num_writers--; @@ -155,10 +155,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - mutex_unlock(&root->fs_info->trans_mutex); - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; + if (root->node != root->commit_root) { + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + } + + mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); -- cgit v1.2.3 From df2ce34c8801af8e00c7f82435fcb1e4b45a2759 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 11:00:45 -0400 Subject: Btrfs: properly set new buffers for new blocks up to date Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++-- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/super.c | 3 +++ 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e1dcda0839c..2f1eb4ae4e1d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,8 +10,13 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) - BUG(); + btrfs_header_parentid(btrfs_buffer_header(root->node))) { + printk("block %Lu parentids don't match buf %Lu, root %Lu\n", + buf->b_blocknr, + btrfs_header_parentid(&node->header), + btrfs_header_parentid(btrfs_buffer_header(root->node))); + WARN_ON(1); + } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2818f1c57170..c86f0e6152f2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -467,7 +467,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return NULL; } buf = find_tree_block(root, ins.objectid); - dirty_tree_block(trans, root, buf); + set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6080a8133d71..8970e9a82156 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -473,7 +473,10 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + sb->s_dirt = 0; + return 0; + root = btrfs_sb(sb); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); -- cgit v1.2.3 From 7f5c15160e1436a53d01f9190db11c2a3a4d788a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 15:56:19 -0400 Subject: Add generation number to btrfs_header, readdir fixes, hash collision fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 8 +++++++- fs/btrfs/ctree.h | 14 +++++++++++++- fs/btrfs/dir-item.c | 4 ++-- fs/btrfs/disk-io.c | 6 +----- fs/btrfs/super.c | 14 +++++++++----- 5 files changed, 32 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 052434a328ec..b33a6bfaf327 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,6 +1,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "transaction.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); @@ -39,7 +40,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *cow; struct btrfs_node *cow_node; - if (buffer_dirty(buf)) { + if (btrfs_header_generation(btrfs_buffer_header(buf)) == + trans->transid) { *cow_ret = buf; return 0; } @@ -47,6 +49,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root cow_node = btrfs_buffer_node(cow); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); + btrfs_set_header_generation(&cow_node->header, trans->transid); *cow_ret = cow; mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); @@ -661,6 +664,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, t->b_blocknr); + btrfs_set_header_generation(&c->header, trans->transid); btrfs_set_header_parentid(&c->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); @@ -750,6 +754,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); + btrfs_set_header_generation(&split->header, trans->transid); btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; @@ -1096,6 +1101,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } btrfs_set_header_nritems(&right->header, nritems - mid); btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 983e3cc9ae9f..96cec6352f12 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -52,6 +52,7 @@ struct btrfs_key { struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ + __le64 generation; __le64 parentid; /* objectid of the tree root */ __le32 csum; __le32 ham; @@ -600,6 +601,17 @@ static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) h->blocknr = cpu_to_le64(blocknr); } +static inline u64 btrfs_header_generation(struct btrfs_header *h) +{ + return le64_to_cpu(h->generation); +} + +static inline void btrfs_set_header_generation(struct btrfs_header *h, + u64 val) +{ + h->generation = cpu_to_le64(val); +} + static inline u64 btrfs_header_parentid(struct btrfs_header *h) { return le64_to_cpu(h->parentid); @@ -839,7 +851,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, - char *name, int name_len); + const char *name, int name_len); int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f81cbcc83b66..c18f81797a88 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -60,8 +60,8 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_match_dir_item_name(struct btrfs_root *root, - struct btrfs_path *path, char - *name, int name_len) + struct btrfs_path *path, + const char *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f1eb4ae4e1d..3dea757d4cbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,11 +11,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) BUG(); if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { - printk("block %Lu parentids don't match buf %Lu, root %Lu\n", - buf->b_blocknr, - btrfs_header_parentid(&node->header), - btrfs_header_parentid(btrfs_buffer_header(root->node))); - WARN_ON(1); + BUG(); } return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8970e9a82156..a2db05594695 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -199,7 +199,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, btrfs_init_path(&path); ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, namelen, 0); - if (ret) { + if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; goto out; } @@ -247,7 +247,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int slot; int advance; unsigned char d_type = DT_UNKNOWN; - int over; + int over = 0; key.objectid = inode->i_ino; key.flags = 0; @@ -258,7 +258,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) { goto err; } - advance = filp->f_pos > 0 && ret != 0; + advance = 0; while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -282,13 +282,17 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) continue; + if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + continue; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), btrfs_dir_objectid(di), d_type); - if (over) + if (over) { + filp->f_pos = btrfs_disk_key_offset(&item->key); break; + } filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; } ret = 0; @@ -425,7 +429,6 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); - BUG_ON(ret); return ret; } @@ -466,6 +469,7 @@ static void btrfs_write_super(struct super_block *sb) { sb->s_dirt = 0; printk("btrfs write_super!\n"); + filemap_flush(sb->s_bdev->bd_inode->i_mapping); } static int btrfs_sync_fs(struct super_block *sb, int wait) -- cgit v1.2.3 From d561c025ee3a0aee53a225bedce0ee35fe12f335 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 19:47:49 -0400 Subject: Btrfs: very minimal locking Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 5 +++-- fs/btrfs/super.c | 26 +++++++++++++++++--------- 4 files changed, 22 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 96cec6352f12..68cafae6a850 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -231,6 +231,7 @@ struct btrfs_fs_info { struct buffer_head *sb_buffer; struct super_block *sb; struct mutex trans_mutex; + struct mutex fs_mutex; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dea757d4cbb..e32ddff55b0e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -116,6 +116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c86f0e6152f2..9583a9ae8b79 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -168,9 +168,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) BUG_ON(!bh); err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); - BUG_ON(err); - if (err) + if (err && err != -EEXIST) { + BUG(); return err; + } radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2db05594695..0ca1080e0198 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -453,25 +453,21 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); inode = btrfs_new_inode(trans, dir, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) - return err; + goto out_unlock; // FIXME mark the inode dirty err = btrfs_add_nondir(trans, dentry, inode); dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); return err; } -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -printk("btrfs write_super!\n"); - filemap_flush(sb->s_bdev->bd_inode->i_mapping); -} - static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -479,17 +475,29 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) int ret; sb->s_dirt = 0; - return 0; + if (!wait) { + filemap_flush(sb->s_bdev->bd_inode->i_mapping); + return 0; + } + filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); root = btrfs_sb(sb); + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; BUG_ON(ret); printk("btrfs sync_fs\n"); + mutex_unlock(&root->fs_info->fs_mutex); return 0; } +static void btrfs_write_super(struct super_block *sb) +{ + btrfs_sync_fs(sb, 1); +} + + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { -- cgit v1.2.3 From 78fae27ebf5bd35fb9b2e4213e486635eacfc0ad Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 11:35:08 -0400 Subject: Btrfs: leak fixes, pinning fixes Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 30 +++++++++++++++++++++--------- fs/btrfs/super.c | 1 + fs/btrfs/transaction.c | 32 ++++++++++++++++++-------------- 3 files changed, 40 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9583a9ae8b79..369b960fce45 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -165,13 +165,23 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) { int err; struct buffer_head *bh = sb_getblk(root->fs_info->sb, blocknr); + struct btrfs_header *header; BUG_ON(!bh); + + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + return 0; + } + err = radix_tree_insert(&root->fs_info->pinned_radix, blocknr, bh); if (err && err != -EEXIST) { BUG(); return err; } + if (err == -EEXIST) + brelse(bh); radix_tree_tag_set(&root->fs_info->pinned_radix, blocknr, tag); return 0; @@ -181,7 +191,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int tag) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) + *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_path path; struct btrfs_key key; @@ -213,12 +223,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); if (refs == 0) { u64 super_blocks_used; + + if (pin) { + ret = pin_down_block(root, blocknr, + CTREE_EXTENT_PINNED); + BUG_ON(ret); + } + super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, &path); - if (extent_root->fs_info->last_insert.objectid > - blocknr) + if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); @@ -257,7 +273,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct radix_tree_tag_clear(radix, gang[i]->b_blocknr, CTREE_EXTENT_PENDING_DEL); wret = __free_extent(trans, extent_root, - gang[i]->b_blocknr, 1); + gang[i]->b_blocknr, 1, 0); if (wret) err = wret; } @@ -281,11 +297,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, CTREE_EXTENT_PENDING_DEL); return 0; } - if (pin) { - ret = pin_down_block(root, blocknr, CTREE_EXTENT_PINNED); - BUG_ON(ret); - } - ret = __free_extent(trans, root, blocknr, num_blocks); + ret = __free_extent(trans, root, blocknr, num_blocks, pin); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ca1080e0198..094a66c267b4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -146,6 +146,7 @@ static void btrfs_read_locked_inode(struct inode *inode) ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { make_bad_inode(inode); + btrfs_release_path(root, &path); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4903b47c9780..46a596e345f0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -4,12 +4,15 @@ #include "disk-io.h" #include "transaction.h" - +static int total_trans = 0; static void put_transaction(struct btrfs_transaction *transaction) { transaction->use_count--; - if (transaction->use_count == 0) + if (transaction->use_count == 0) { + WARN_ON(total_trans == 0); + total_trans--; kfree(transaction); + } } static int join_transaction(struct btrfs_root *root) @@ -18,6 +21,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans = root->fs_info->running_transaction; if (!cur_trans) { cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; cur_trans->num_writers = 0; @@ -108,7 +112,6 @@ static int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { DEFINE_WAIT(wait); - commit->use_count++; while(!commit->commit_done) { prepare_to_wait(&commit->commit_wait, &wait, TASK_UNINTERRUPTIBLE); @@ -126,7 +129,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret = 0; - struct buffer_head *snap = root->commit_root; + struct buffer_head *snap; struct btrfs_key snap_key; struct btrfs_transaction *cur_trans; DEFINE_WAIT(wait); @@ -153,15 +156,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); - cur_trans = root->fs_info->running_transaction; - root->fs_info->running_transaction = NULL; - if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - mutex_unlock(&root->fs_info->trans_mutex); if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); @@ -173,17 +172,24 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; + root->fs_info->running_transaction = NULL; + mutex_unlock(&root->fs_info->trans_mutex); + ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); + btrfs_finish_extent_commit(trans, root); + mutex_lock(&root->fs_info->trans_mutex); + put_transaction(cur_trans); put_transaction(cur_trans); + mutex_unlock(&root->fs_info->trans_mutex); kfree(trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); + snap = root->commit_root; root->commit_root = root->node; get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); @@ -191,10 +197,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); + BUG_ON(ret); root->fs_info->generation = root->root_key.offset + 1; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); + printk("at free, total trans %d\n", total_trans); } return ret; -- cgit v1.2.3 From 134e97313491c3a3d6bc3eca3b7c9c64408cbd08 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 13:44:56 -0400 Subject: Btrfs: unlink and delete_inode Signed-off-by: Chris Mason --- fs/btrfs/super.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 094a66c267b4..f96bd92155cc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -187,6 +187,104 @@ static void btrfs_read_locked_inode(struct inode *inode) return; } +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_path path; + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + const char *name = dentry->d_name.name; + int name_len = dentry->d_name.len; + int ret; + u64 objectid; + struct btrfs_dir_item *di; + + btrfs_init_path(&path); + root = btrfs_sb(dir->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + + ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, + name, name_len, -1); + if (ret < 0) + goto err; + if (ret > 0) { + ret = -ENOENT; + goto err; + } + di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_dir_item); + objectid = btrfs_dir_objectid(di); + + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + dentry->d_inode->i_ctime = dir->i_ctime; +err: + btrfs_release_path(root, &path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret == 0) + inode_dec_link_count(dentry->d_inode); + return ret; +} + +static int btrfs_free_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + u64 objectid = inode->i_ino; + struct btrfs_path path; + struct btrfs_inode_map_item *map; + struct btrfs_key stat_data_key; + int ret; + clear_inode(inode); + btrfs_init_path(&path); + ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + btrfs_release_path(root, &path); + goto error; + } + map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_inode_map_item); + btrfs_disk_key_to_cpu(&stat_data_key, &map->key); + ret = btrfs_del_item(trans, root->fs_info->inode_root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); + btrfs_init_path(&path); + + ret = btrfs_lookup_inode(trans, root, &path, objectid, -1); + BUG_ON(ret); + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); +error: + return ret; +} + +static void btrfs_delete_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) { + goto no_delete; + } + inode->i_size = 0; + if (inode->i_blocks) + WARN_ON(1); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_free_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return; +no_delete: + clear_inode(inode); +} + + static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ino_t *ino) { @@ -272,6 +370,13 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; +#if 0 + page_cache_readahead( + inode->i_sb->s_bdev->bd_inode->i_mapping, + &filp->f_ra, filp, + path.nodes[0]->b_blocknr >> + (PAGE_CACHE_SHIFT - inode->i_blkbits), 1); +#endif } else { slot++; path.slots[0]++; @@ -441,8 +546,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } - inode_dec_link_count(inode); - iput(inode); return err; } @@ -453,6 +556,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_root *root = btrfs_sb(dir->i_sb); struct inode *inode; int err; + int drop_inode = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -462,10 +566,16 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; // FIXME mark the inode dirty err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } return err; } @@ -516,7 +626,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .statfs = simple_statfs, - .drop_inode = generic_delete_inode, + .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, @@ -526,6 +636,7 @@ static struct super_operations btrfs_super_ops = { static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, + .unlink = btrfs_unlink, }; static struct file_operations btrfs_dir_file_operations = { -- cgit v1.2.3 From dcea79152cfb7cf244bdd9c23f4291831ffd6adf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 14:03:08 -0400 Subject: Btrfs: very simple readdir readahead Signed-off-by: Chris Mason --- fs/btrfs/super.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f96bd92155cc..e76fcd80db6a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -332,6 +332,34 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +{ + struct buffer_head *bh; + struct btrfs_node *node; + int i; + int nritems; + u64 objectid; + u64 item_objectid; + u64 blocknr; + int slot; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + bh = sb_getblk(root->fs_info->sb, blocknr); + ll_rw_block(READ, 1, &bh); + brelse(bh); + } + +} static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -358,6 +386,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; } advance = 0; + reada_leaves(root, &path); while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -370,13 +399,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; -#if 0 - page_cache_readahead( - inode->i_sb->s_bdev->bd_inode->i_mapping, - &filp->f_ra, filp, - path.nodes[0]->b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits), 1); -#endif + if (path.nodes[1] && path.slots[1] == 0) + reada_leaves(root, &path); } else { slot++; path.slots[0]++; -- cgit v1.2.3 From f7922033efe957f79ae57f6026e93c8148e7f7ed Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 25 Mar 2007 20:17:36 -0400 Subject: btrfs_mkdir Signed-off-by: Chris Mason --- fs/btrfs/super.c | 184 ++++++++++++++++++++----------------------------------- 1 file changed, 65 insertions(+), 119 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e76fcd80db6a..820d21ee4afd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,124 +18,6 @@ static struct inode_operations btrfs_dir_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; -#if 0 -/* some random number */ - -static struct super_operations ramfs_ops; - -static struct backing_dev_info ramfs_backing_dev_info = { - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | - BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | - BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP, -}; - -struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) -{ - struct inode * inode = new_inode(sb); - - if (inode) { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_blocks = 0; - inode->i_mapping->a_ops = &ramfs_aops; - inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - switch (mode & S_IFMT) { - default: - init_special_inode(inode, mode, dev); - break; - case S_IFREG: - inode->i_op = &ramfs_file_inode_operations; - inode->i_fop = &ramfs_file_operations; - break; - case S_IFDIR: - inode->i_op = &ramfs_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - - /* directory inodes start off with i_nlink == 2 (for "." entry) */ - inc_nlink(inode); - break; - case S_IFLNK: - inode->i_op = &page_symlink_inode_operations; - break; - } - } - return inode; -} - -/* - * File creation. Allocate an inode, and we're done.. - */ -/* SMP-safe */ -static int -ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) -{ - struct inode * inode = ramfs_get_inode(dir->i_sb, mode, dev); - int error = -ENOSPC; - - if (inode) { - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - inode->i_mode |= S_ISGID; - } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ - error = 0; - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - } - return error; -} - -static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) -{ - int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); - if (!retval) - inc_nlink(dir); - return retval; -} - -static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) -{ - return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); -} - -static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) -{ - struct inode *inode; - int error = -ENOSPC; - - inode = ramfs_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); - if (inode) { - int l = strlen(symname)+1; - error = page_symlink(inode, symname, l); - if (!error) { - if (dir->i_mode & S_ISGID) - inode->i_gid = dir->i_gid; - d_instantiate(dentry, inode); - dget(dentry); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - } else - iput(inode); - } - return error; -} - -static struct inode_operations ramfs_dir_inode_operations = { - .create = ramfs_create, - .lookup = simple_lookup, - .link = simple_link, - .unlink = simple_unlink, - .symlink = ramfs_symlink, - .mkdir = ramfs_mkdir, - .rmdir = simple_rmdir, - .mknod = ramfs_mknod, - .rename = simple_rename, -}; -#endif - static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -533,9 +415,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + /* FIXME do this on link */ + if (mode & S_IFDIR) + inode->i_size = 3; fill_inode_item(&inode_item, inode); - key.objectid = objectid; key.flags = 0; key.offset = 0; @@ -603,6 +487,67 @@ out_unlock: return err; } +static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + char buf[2]; + buf[0] = '.'; + buf[1] = '.'; + + ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, + inode->i_ino, 1); + if (ret) + goto error; + ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, + dir->i_ino, 1); +error: + return ret; +} + +static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + int err = 0; + int drop_on_err = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_unlock; + } + inode = btrfs_new_inode(trans, dir, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_fail; + } + drop_on_err = 1; + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + err = btrfs_make_empty_dir(trans, inode, dir); + if (err) + goto out_fail; + err = btrfs_add_link(trans, dentry, inode); + if (err) + goto out_fail; + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + drop_on_err = 0; + +out_fail: + btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + if (drop_on_err) + iput(inode); + return err; +} + static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -661,6 +606,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, .unlink = btrfs_unlink, + .mkdir = btrfs_mkdir, }; static struct file_operations btrfs_dir_file_operations = { -- cgit v1.2.3 From 4730a4bc5bf3861d2cb40999374b3ac0ee2a1e0d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 12:00:39 -0400 Subject: btrfs_dirty_inode Signed-off-by: Chris Mason --- fs/btrfs/super.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 820d21ee4afd..905b093a85fe 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -242,6 +242,7 @@ static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) } } + static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -392,6 +393,51 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_inode_generation(item, inode->i_generation); } +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path path; + int ret; + + btrfs_init_path(&path); + + ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + mark_buffer_dirty(path.nodes[0]); +failed: + btrfs_release_path(root, &path); + return 0; +} + +static int btrfs_write_inode(struct inode *inode, int wait) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_trans_handle *trans; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + if (wait) + btrfs_commit_transaction(trans, root); + else + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct inode *dir, int mode) { @@ -415,9 +461,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - /* FIXME do this on link */ - if (mode & S_IFDIR) - inode->i_size = 3; fill_inode_item(&inode_item, inode); key.objectid = objectid; @@ -431,7 +474,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); - // FIXME mark_inode_dirty(inode) return inode; } @@ -443,6 +485,12 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); + if (ret == 0) { + dentry->d_parent->d_inode->i_size += dentry->d_name.len; + ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), + dentry->d_parent->d_inode); + } + return ret; } @@ -502,6 +550,10 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, goto error; ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, dir->i_ino, 1); + if (ret) + goto error; + inode->i_size = 3; + ret = btrfs_update_inode(trans, root, inode); error: return ret; } @@ -536,7 +588,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; d_instantiate(dentry, inode); - mark_inode_dirty(inode); drop_on_err = 0; out_fail: @@ -600,6 +651,7 @@ static struct super_operations btrfs_super_ops = { .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, + .write_inode = btrfs_write_inode, }; static struct inode_operations btrfs_dir_inode_operations = { -- cgit v1.2.3 From dee26a9f7aab7ffe1193cd1415b23a69426acc9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 16:00:06 -0400 Subject: btrfs_get_block, file read/write Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 26 ++++++- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 52 +++++++++++++- fs/btrfs/super.c | 188 ++++++++++++++++++++++++++++++++++++++++--------- 5 files changed, 230 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 454028b6bd46..2ca301b289a6 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* Make directory hashing work on 32 bit * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0aa1052d9f67..7b7120d3ab42 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -198,7 +198,7 @@ struct btrfs_file_extent_item { __le64 disk_blocknr; __le64 disk_num_blocks; /* - * the logical offset in file bytes (no csums) + * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point * into the middle of an existing extent on disk, sharing it * between two snapshots (useful if some bytes in the middle of the @@ -812,12 +812,19 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) +/* extent-item.c */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root + *root, u64 num_blocks, u64 search_start, u64 + search_end, u64 owner, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct + btrfs_root *root); +/* ctree.c */ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -834,8 +841,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *snap); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct - btrfs_root *root); +/* root-item.c */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key); int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root @@ -846,6 +852,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item); int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); +/* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type); @@ -854,6 +861,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root const char *name, int name_len, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); +/* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); @@ -863,9 +871,21 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +/* inode-item.c */ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); + +/* file-item.c */ +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result); +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b14104276eea..82f6e9eed1d0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -403,7 +403,7 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -static int alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_blocks, u64 search_start, u64 search_end, u64 owner, struct btrfs_key *ins) { @@ -458,7 +458,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int ret; struct buffer_head *buf; - ret = alloc_extent(trans, root, 1, 0, (unsigned long)-1, + ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, btrfs_header_parentid(btrfs_buffer_header(root->node)), &ins); if (ret) { BUG(); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8e1e5b4ccfaf..4a0367d702b7 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,9 +1,57 @@ #include #include "ctree.h" +#include "disk-io.h" #include "transaction.h" -int btrfs_create_file(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 dirid, u64 *objectid) +int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + u64 num_blocks, u64 hint_block, + u64 *result) { + struct btrfs_key ins; + int ret = 0; + struct btrfs_file_extent_item *item; + struct btrfs_key file_key; + struct btrfs_path path; + + btrfs_init_path(&path); + ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, + (u64)-1, objectid, &ins); + BUG_ON(ret); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + + ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + sizeof(*item)); + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(item, ins.objectid); + btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_offset(item, 0); + btrfs_set_file_extent_num_blocks(item, ins.offset); + mark_buffer_dirty(path.nodes[0]); + *result = ins.objectid; + btrfs_release_path(root, &path); return 0; } + +int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid, + u64 blocknr, u64 num_blocks, int mod) +{ + int ret; + struct btrfs_key file_key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + file_key.objectid = objectid; + file_key.offset = blocknr; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); + return ret; +} diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 905b093a85fe..2c2883f2856d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -17,6 +18,9 @@ static struct inode_operations btrfs_dir_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; +static struct inode_operations btrfs_file_inode_operations; +static struct address_space_operations btrfs_aops; +static struct file_operations btrfs_file_operations; static void btrfs_read_locked_inode(struct inode *inode) { @@ -57,6 +61,9 @@ static void btrfs_read_locked_inode(struct inode *inode) break; #endif case S_IFREG: + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; break; case S_IFDIR: inode->i_op = &btrfs_dir_inode_operations; @@ -214,35 +221,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) -{ - struct buffer_head *bh; - struct btrfs_node *node; - int i; - int nritems; - u64 objectid; - u64 item_objectid; - u64 blocknr; - int slot; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); - nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - bh = sb_getblk(root->fs_info->sb, blocknr); - ll_rw_block(READ, 1, &bh); - brelse(bh); - } - -} - static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -269,21 +247,18 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; } advance = 0; - reada_leaves(root, &path); while(1) { leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (advance) { - if (slot == nritems -1) { + if (advance || slot >= nritems) { + if (slot >= nritems -1) { ret = btrfs_next_leaf(root, &path); if (ret) break; leaf = btrfs_buffer_leaf(path.nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path.slots[0]; - if (path.nodes[1] && path.slots[1] == 0) - reada_leaves(root, &path); } else { slot++; path.slots[0]++; @@ -297,6 +272,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; + + advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), @@ -524,6 +501,11 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } dir->i_sb->s_dirt = 1; btrfs_end_transaction(trans, root); out_unlock: @@ -623,11 +605,124 @@ printk("btrfs sync_fs\n"); return 0; } +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + struct btrfs_path path; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_trans_handle *trans = NULL; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + if (create) + trans = btrfs_start_transaction(root, 1); + + + ret = btrfs_lookup_file_extent(trans, root, &path, + inode->i_ino, iblock, 1, 0); + if (ret < 0) { + btrfs_release_path(root, &path); + err = ret; + goto out; + } + + if (ret != 0) { + if (path.slots[0] == 0) { + btrfs_release_path(root, &path); + goto allocate; + } + path.slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path.nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* exact match found, use it */ + if (ret == 0) { + err = 0; + map_bh(result, inode->i_sb, blocknr); + btrfs_release_path(root, &path); + goto out; + } + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + extent_end = 0; + extent_start = 0; + btrfs_release_path(root, &path); + goto allocate; + } + + extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start += btrfs_file_extent_offset(item); + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + btrfs_release_path(root, &path); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + goto out; + } +allocate: + /* ok, create a new extent */ + if (!create) { + err = 0; + goto out; + } + ret = btrfs_alloc_file_extent(trans, root, objectid, iblock, + 1, extent_end, &blocknr); + if (ret) { + err = ret; + goto out; + } + map_bh(result, inode->i_sb, blocknr); + +out: + if (trans) + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + +static int btrfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, btrfs_get_block); +} + static void btrfs_write_super(struct super_block *sb) { btrfs_sync_fs(sb, 1); } +static int btrfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, btrfs_get_block); +} + +static int btrfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); +} + +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, btrfs_get_block, wbc); +} static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) @@ -667,6 +762,31 @@ static struct file_operations btrfs_dir_file_operations = { .readdir = btrfs_readdir, }; +static struct address_space_operations btrfs_aops = { + .readpage = btrfs_readpage, + .readpages = btrfs_readpages, + .writepage = btrfs_writepage, + .sync_page = block_sync_page, + .prepare_write = btrfs_prepare_write, + .commit_write = generic_commit_write, +}; + +static struct inode_operations btrfs_file_inode_operations = { + .truncate = NULL, +}; + +static struct file_operations btrfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .mmap = generic_file_mmap, + .open = generic_file_open, + .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, +}; static int __init init_btrfs_fs(void) { -- cgit v1.2.3 From 6407bf6d7c449cbfb0a39d985194e265eda3baf4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 06:33:00 -0400 Subject: Btrfs: reference counts on data extents Signed-off-by: Chris Mason --- fs/btrfs/TODO | 3 ++ fs/btrfs/ctree.h | 9 +++--- fs/btrfs/extent-tree.c | 87 ++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/inode-map.c | 1 + fs/btrfs/super.c | 6 ++-- 5 files changed, 82 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 2ca301b289a6..7aec75e9a37e 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,7 +8,10 @@ * Add block mapping tree (simple dm layer) * Add simple tree locking (semaphore per tree) * Make allocator smarter +* make level a field in header +* add a block group to struct inode * Make directory hashing work on 32 bit +* Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7b7120d3ab42..1a98952e0faf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -9,10 +9,11 @@ struct btrfs_transaction; #define BTRFS_MAGIC "_BtRfS_M" -#define BTRFS_ROOT_TREE_OBJECTID 1 -#define BTRFS_EXTENT_TREE_OBJECTID 2 -#define BTRFS_INODE_MAP_OBJECTID 3 -#define BTRFS_FS_TREE_OBJECTID 4 +#define BTRFS_ROOT_TREE_OBJECTID 1ULL +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL +#define BTRFS_INODE_MAP_OBJECTID 3ULL +#define BTRFS_FS_TREE_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 82f6e9eed1d0..4d4fc48c0a31 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -13,7 +13,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr) + *root, u64 blocknr, u64 num_blocks) { struct btrfs_path path; int ret; @@ -29,7 +29,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = 1; + key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, 0, 1); if (ret != 0) @@ -48,7 +48,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root } static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u32 *refs) + *root, u64 blocknr, u64 num_blocks, u32 *refs) { struct btrfs_path path; int ret; @@ -57,7 +57,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *item; btrfs_init_path(&path); key.objectid = blocknr; - key.offset = 1; + key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, @@ -76,17 +76,34 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, { u64 blocknr; struct btrfs_node *buf_node; + struct btrfs_leaf *buf_leaf; + struct btrfs_disk_key *key; + struct btrfs_file_extent_item *fi; int i; + int leaf; + int ret; if (!root->ref_cows) return 0; buf_node = btrfs_buffer_node(buf); - if (btrfs_is_leaf(buf_node)) - return 0; - + leaf = btrfs_is_leaf(buf_node); + buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { - blocknr = btrfs_node_blockptr(buf_node, i); - inc_block_ref(trans, root, blocknr); + if (leaf) { + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + ret = inc_block_ref(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi)); + BUG_ON(ret); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + ret = inc_block_ref(trans, root, blocknr, 1); + BUG_ON(ret); + } } return 0; } @@ -469,6 +486,37 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +static int drop_leaf_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct buffer_head *cur) +{ + struct btrfs_disk_key *key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + int i; + int nritems; + int ret; + + BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); + leaf = btrfs_buffer_leaf(cur); + nritems = btrfs_header_nritems(&leaf->header); + for (i = 0; i < nritems; i++) { + key = &leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + /* + * FIXME make sure to insert a trans record that + * repeats the snapshot del on crash + */ + ret = btrfs_free_extent(trans, root, + btrfs_file_extent_disk_blocknr(fi), + btrfs_file_extent_disk_num_blocks(fi), + 0); + BUG_ON(ret); + } + return 0; +} + /* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. @@ -483,28 +531,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root u32 refs; ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, - &refs); + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; /* * walk down to the last node level and free all the leaves */ - while(*level > 0) { + while(*level >= 0) { cur = path->nodes[*level]; if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; + if (*level == 0) { + ret = drop_leaf_ref(trans, root, cur); + BUG_ON(ret); + break; + } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, &refs); - if (refs != 1 || *level == 1) { + ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { path->slots[*level]++; ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); continue; } - BUG_ON(ret); next = read_tree_block(root, blocknr); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -513,8 +566,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: - ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, - 1, 1); + ret = btrfs_free_extent(trans, root, + path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -544,10 +597,10 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); + BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; - BUG_ON(ret); } } return 1; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index ad2d375b830d..1b2c5e043508 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -25,6 +25,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, if (fs_root->fs_info->last_inode_alloc_dirid == dirid) search_start = fs_root->fs_info->last_inode_alloc; + search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.flags = 0; btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2c2883f2856d..487da5a213c0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -700,7 +700,7 @@ out: static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return block_prepare_write(page, from, to, btrfs_get_block); + return nobh_prepare_write(page, from, to, btrfs_get_block); } static void btrfs_write_super(struct super_block *sb) @@ -721,7 +721,7 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return block_write_full_page(page, btrfs_get_block, wbc); + return nobh_writepage(page, btrfs_get_block, wbc); } static int btrfs_get_sb(struct file_system_type *fs_type, @@ -768,7 +768,7 @@ static struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, - .commit_write = generic_commit_write, + .commit_write = nobh_commit_write, }; static struct inode_operations btrfs_file_inode_operations = { -- cgit v1.2.3 From f4b9aa8d3b877d0a6044a6d6d9a44b29cab9e265 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 11:05:53 -0400 Subject: btrfs_truncate Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 25 ++++++------ fs/btrfs/super.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 114 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4d4fc48c0a31..176c6dc534ba 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -171,20 +171,21 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct btrfs_header *header; struct buffer_head *bh; - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { + if (!pending) { + bh = sb_find_get_block(root->fs_info->sb, blocknr); + if (bh) { + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + root->fs_info->running_transaction->transid) { + brelse(bh); + return 0; + } brelse(bh); - return 0; } - brelse(bh); - } - if (pending) - err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); - else err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + } else { + err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); + } BUG_ON(err); return 0; } @@ -223,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); + mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,7 +242,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) BUG(); } - mark_buffer_dirty(path.nodes[0]); btrfs_release_path(extent_root, &path); finish_current_insert(trans, extent_root); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 487da5a213c0..fd3d9d616ff9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,11 +28,15 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { - make_bad_inode(inode); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); return; } inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), @@ -53,6 +57,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -151,20 +156,85 @@ error: return ret; } +static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + int ret; + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + u64 extent_start; + u64 extent_num_blocks; + + /* FIXME, add redo link to tree so we don't leak on crash */ + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + while(1) { + btrfs_init_path(&path); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + btrfs_release_path(root, &path); + goto error; + } + if (ret > 0) { + BUG_ON(path.slots[0] == 0); + path.slots[0]--; + } + leaf = btrfs_buffer_leaf(path.nodes[0]); + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + break; + if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + break; + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + /* FIXME: add extent truncation */ + if (btrfs_disk_key_offset(found_key) < inode->i_size) + break; + fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_file_extent_item); + extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + key.offset = btrfs_disk_key_offset(found_key) - 1; + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_release_path(root, &path); + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + if (btrfs_disk_key_offset(found_key) == 0) + break; + } + btrfs_release_path(root, &path); + ret = 0; +error: + return ret; +} + static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) { goto no_delete; } inode->i_size = 0; - if (inode->i_blocks) - WARN_ON(1); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + if (S_ISREG(inode->i_mode)) { + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + } btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -173,7 +243,6 @@ no_delete: clear_inode(inode); } - static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ino_t *ino) { @@ -688,6 +757,8 @@ allocate: err = ret; goto out; } + inode->i_blocks += inode->i_sb->s_blocksize >> 9; + set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); out: @@ -724,6 +795,30 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) return nobh_writepage(page, btrfs_get_block, wbc); } +static void btrfs_truncate(struct inode *inode) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + int ret; + struct btrfs_trans_handle *trans; + + if (!S_ISREG(inode->i_mode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + nobh_truncate_page(inode->i_mapping, inode->i_size); + + /* FIXME, add redo link to tree so we don't leak on crash */ + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + mark_inode_dirty(inode); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -772,7 +867,7 @@ static struct address_space_operations btrfs_aops = { }; static struct inode_operations btrfs_file_inode_operations = { - .truncate = NULL, + .truncate = btrfs_truncate, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3 From 9773a788681db1f5c2701b7433737fdca61a14ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 11:26:26 -0400 Subject: Btrfs: byte offsets for file keys Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/file-item.c | 5 +++-- fs/btrfs/super.c | 7 +++++-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c3fa12a6b59e..1897f3a65b4f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -899,5 +899,5 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, u64 num_blocks, int mod); + u64 blocknr, int mod); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 09de270c5a52..5230a44cb19b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -26,6 +26,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, &path, &file_key, sizeof(*item)); + BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, ins.objectid); @@ -42,7 +43,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, u64 num_blocks, int mod) + u64 offset, int mod) { int ret; struct btrfs_key file_key; @@ -50,7 +51,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; file_key.objectid = objectid; - file_key.offset = blocknr; + file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fd3d9d616ff9..f2f08189903d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -697,7 +697,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, &path, - inode->i_ino, iblock, 1, 0); + inode->i_ino, + iblock << inode->i_blkbits, 0); if (ret < 0) { btrfs_release_path(root, &path); err = ret; @@ -737,6 +738,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, } extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start = extent_start >> inode->i_blkbits; extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); btrfs_release_path(root, &path); @@ -751,7 +753,8 @@ allocate: err = 0; goto out; } - ret = btrfs_alloc_file_extent(trans, root, objectid, iblock, + ret = btrfs_alloc_file_extent(trans, root, objectid, + iblock << inode->i_blkbits, 1, extent_end, &blocknr); if (ret) { err = ret; -- cgit v1.2.3 From 5f443fd280c76a2170259cebaa4108c9daad6e1b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Mar 2007 13:42:32 -0400 Subject: btrfs_rmdir Signed-off-by: Chris Mason --- fs/btrfs/super.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f2f08189903d..67659b6ce962 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -81,11 +81,12 @@ static void btrfs_read_locked_inode(struct inode *inode) return; } -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, + struct dentry *dentry) { struct btrfs_path path; - struct btrfs_root *root; - struct btrfs_trans_handle *trans; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret; @@ -93,10 +94,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_dir_item *di; btrfs_init_path(&path); - root = btrfs_sb(dir->i_sb); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, name, name_len, -1); if (ret < 0) @@ -114,13 +111,98 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_release_path(root, &path); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); if (ret == 0) inode_dec_link_count(dentry->d_inode); return ret; } +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + int ret; + + root = btrfs_sb(dir->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_unlink_trans(trans, root, dir, dentry); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_path path; + struct btrfs_key key; + struct btrfs_trans_handle *trans; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; + + btrfs_init_path(&path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + + BUG_ON(ret == 0); + BUG_ON(path.slots[0] == 0); + path.slots[0]--; + leaf = btrfs_buffer_leaf(path.nodes[0]); + found_key = &leaf->items[path.slots[0]].key; + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { + err = -ENOENT; + goto out; + } + if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != 2) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, &path); + BUG_ON(ret); + btrfs_release_path(root, &path); + key.offset = 1; + ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + if (ret > 0) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, &path); + if (ret) { + err = ret; + goto out; + } + btrfs_release_path(root, &path); + + /* now the directory is empty */ + err = btrfs_unlink_trans(trans, root, dir, dentry); + if (!err) { + inode->i_size = 0; + } +out: + mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_end_transaction(trans, root); + if (ret && !err) + err = ret; + return err; +} + static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -191,9 +273,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; - if (btrfs_disk_key_offset(found_key) < inode->i_size) - break; - /* FIXME: add extent truncation */ if (btrfs_disk_key_offset(found_key) < inode->i_size) break; fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), @@ -209,7 +288,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, extent_start, extent_num_blocks, 0); BUG_ON(ret); - if (btrfs_disk_key_offset(found_key) == 0) + if (key.offset + 1 == 0) break; } btrfs_release_path(root, &path); @@ -852,6 +931,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .create = btrfs_create, .unlink = btrfs_unlink, .mkdir = btrfs_mkdir, + .rmdir = btrfs_rmdir, }; static struct file_operations btrfs_dir_file_operations = { -- cgit v1.2.3 From d98237b3ede7ab98892f7fa62201a13694c526e2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 13:57:48 -0400 Subject: Btrfs: use a btree inode instead of sb_getblk Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 174 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/disk-io.h | 5 +- fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/super.c | 8 ++- fs/btrfs/transaction.c | 2 +- 6 files changed, 174 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1897f3a65b4f..b2ebed756c13 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -233,6 +233,7 @@ struct btrfs_fs_info { struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; struct super_block *sb; + struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; }; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 758a62aba063..970103f2cacd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,14 +1,17 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); + } if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { BUG(); @@ -16,25 +19,154 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) return 0; } -struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + + page = find_lock_page(mapping, index); + if (!page) + return NULL; + + if (!page_has_buffers(page)) + goto out_unlock; + + head = page_buffers(page); + bh = head; + do { + if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); + if (!page) + return NULL; + + wait_on_page_writeback(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); + head = page_buffers(page); + bh = head; + do { + if (!buffer_mapped(bh)) { + bh->b_bdev = root->fs_info->sb->s_bdev; + bh->b_blocknr = first_block; + set_buffer_mapped(bh); + } + if (bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + first_block++; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +static sector_t max_block(struct block_device *bdev) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int size = block_size(bdev); + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + +static int btree_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + if (iblock >= max_block(inode->i_sb->s_bdev)) { + if (create) + return -EIO; + + /* + * for reads, we're just trying to fill a partial page. + * return a hole, they will have to call get_block again + * before they can fill it, and they will get -EIO at that + * time + */ + return 0; + } + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_write_full_page(page, btree_get_block, wbc); } -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) +static int btree_readpage(struct file * file, struct page * page) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_read_full_page(page, btree_get_block); } +static struct address_space_operations btree_aops = { + .readpage = btree_readpage, + .writepage = btree_writepage, + .sync_page = block_sync_page, +}; + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); + struct buffer_head *bh = NULL; - if (!buf) - return buf; - if (check_tree_block(root, buf)) + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return bh; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + goto fail; + } else { + unlock_buffer(bh); + } + if (check_tree_block(root, bh)) BUG(); - return buf; + return bh; +fail: + brelse(bh); + return NULL; + } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -101,11 +233,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + sb_set_blocksize(sb, sb_buffer->b_size); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -114,14 +246,30 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->disk_super = disk_super; - fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + fs_info->btree_inode = new_inode(sb); + fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + + if (!fs_info->sb_buffer) + return NULL; + + brelse(sb_buffer); + sb_buffer = NULL; + disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -137,7 +285,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(disk_super, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); - root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; @@ -191,6 +338,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 099f7eea0ec7..c2c38bda704d 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,7 +21,8 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr); +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -37,5 +38,5 @@ int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int mkfs(int fd, u64 num_blocks, u32 blocksize); +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 176c6dc534ba..b2ae8e768b6c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -172,8 +172,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) struct buffer_head *bh; if (!pending) { - bh = sb_find_get_block(root->fs_info->sb, blocknr); - if (bh) { + bh = btrfs_find_tree_block(root, blocknr); + if (bh && buffer_uptodate(bh)) { header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { @@ -291,12 +291,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; - struct buffer_head *t; int pending_ret; int ret; if (root == extent_root) { - t = find_tree_block(root, blocknr); pin_down_block(root, blocknr, 1); return 0; } @@ -482,7 +480,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } - buf = find_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67659b6ce962..8f07f462236a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -474,6 +474,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) disk_super = (struct btrfs_super_block *)bh->b_data; root = open_ctree(sb, bh, disk_super); sb->s_fs_info = root; + disk_super = root->fs_info->disk_super; + if (!root) { printk("btrfs: open_ctree failed\n"); return -EIO; @@ -734,15 +736,15 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_trans_handle *trans; struct btrfs_root *root; int ret; + root = btrfs_sb(sb); sb->s_dirt = 0; if (!wait) { - filemap_flush(sb->s_bdev->bd_inode->i_mapping); + filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - root = btrfs_sb(sb); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 46a596e345f0..b20fb53a0d27 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -74,7 +74,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); return 0; } -- cgit v1.2.3 From 87cbda5c1f54aba67b1e318a4a4824c9e9f16324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 19:44:27 -0400 Subject: Btrfs: sha256 csums on metadata Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 8 +++++-- fs/btrfs/disk-io.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/super.c | 4 ++-- 3 files changed, 67 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b2ebed756c13..6ff87f44c5d8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -51,11 +51,11 @@ struct btrfs_key { * every tree block (leaf or node) starts with this header. */ struct btrfs_header { + __le32 csum[8]; u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 parentid; /* objectid of the tree root */ - __le32 csum; __le32 ham; __le16 nritems; __le16 flags; @@ -75,9 +75,10 @@ struct buffer_head; * it currently lacks any block count etc etc */ struct btrfs_super_block { + __le32 csum[8]; + /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ - __le32 csum; __le64 magic; __le32 blocksize; __le64 generation; @@ -217,6 +218,7 @@ struct btrfs_inode_map_item { struct btrfs_disk_key key; } __attribute__ ((__packed__)); +struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *fs_root; struct btrfs_root *extent_root; @@ -236,6 +238,8 @@ struct btrfs_fs_info { struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; + struct crypto_hash *hash_tfm; + spinlock_t hash_lock; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 970103f2cacd..2afb7922b062 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -126,8 +128,51 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } +static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, + int verify) +{ + struct btrfs_node *node = btrfs_buffer_node(bh); + struct scatterlist sg; + struct crypto_hash *tfm = root->fs_info->hash_tfm; + struct hash_desc desc; + int ret; + char result[32]; + + desc.tfm = tfm; + desc.flags = 0; + sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + spin_lock(&root->fs_info->hash_lock); + ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + spin_unlock(&root->fs_info->hash_lock); + if (ret) { + printk("sha256 digest failed\n"); + } + if (verify) { + if (memcmp(node->header.csum, result, sizeof(result))) + printk("csum verify failed on %Lu\n", bh->b_blocknr); + return -EINVAL; + } else + memcpy(node->header.csum, result, sizeof(node->header.csum)); + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { + struct buffer_head *bh; + struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct buffer_head *head; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, root->fs_info->sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + head = page_buffers(page); + bh = head; + do { + if (buffer_dirty(bh)) + csum_tree_block(root, bh, 0); + bh = bh->b_this_page; + } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } @@ -157,6 +202,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; + csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } @@ -233,8 +279,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - if (!btrfs_super_root(disk_super)) + if (!btrfs_super_root(disk_super)) { return NULL; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); sb_set_blocksize(sb, sb_buffer->b_size); @@ -252,6 +299,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); + if (!fs_info->hash_tfm) { + printk("failed to allocate sha256 hash\n"); + return NULL; + } + spin_lock_init(&fs_info->hash_lock); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -262,9 +315,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); - if (!fs_info->sb_buffer) + if (!fs_info->sb_buffer) { +printk("failed2\n"); return NULL; - + } brelse(sb_buffer); sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -300,6 +354,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); clear_buffer_dirty(bh); + csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; get_bh(bh); submit_bh(WRITE, bh); @@ -338,6 +393,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + crypto_free_hash(root->fs_info->hash_tfm); iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8f07f462236a..151c1002d74e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -473,13 +473,13 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) } disk_super = (struct btrfs_super_block *)bh->b_data; root = open_ctree(sb, bh, disk_super); - sb->s_fs_info = root; - disk_super = root->fs_info->disk_super; if (!root) { printk("btrfs: open_ctree failed\n"); return -EIO; } + sb->s_fs_info = root; + disk_super = root->fs_info->disk_super; printk("read in super total blocks %Lu root %Lu\n", btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); -- cgit v1.2.3 From 75dfe3960e602e63ea42ac7a2a0520832b189ffa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 11:56:46 -0400 Subject: btrfs_file_write -- first pass Signed-off-by: Chris Mason --- fs/btrfs/super.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 213 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 151c1002d74e..7914b31f5bcd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -755,7 +757,7 @@ printk("btrfs sync_fs\n"); return 0; } -static int btrfs_get_block(struct inode *inode, sector_t iblock, +static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { int ret; @@ -772,7 +774,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct btrfs_disk_key *found_key; btrfs_init_path(&path); - mutex_lock(&root->fs_info->fs_mutex); if (create) trans = btrfs_start_transaction(root, 1); @@ -848,6 +849,16 @@ allocate: out: if (trans) btrfs_end_transaction(trans, root); + return err; +} + +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int err; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_get_block_lock(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -855,8 +866,15 @@ out: static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { + WARN_ON(1); return nobh_prepare_write(page, from, to, btrfs_get_block); } +static int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + WARN_ON(1); + return nobh_commit_write(file, page, from, to); +} static void btrfs_write_super(struct super_block *sb) { @@ -903,6 +921,196 @@ static void btrfs_truncate(struct inode *inode) mark_inode_dirty(inode); } +static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, + struct page **prepared_pages, + const char __user * buf) +{ + long page_fault = 0; + int i; + int offset = pos & (PAGE_CACHE_SIZE - 1); + + for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + size_t count = min_t(size_t, + PAGE_CACHE_SIZE - offset, write_bytes); + struct page *page = prepared_pages[i]; + fault_in_pages_readable(buf, count); + + /* Copy data from userspace to the current page */ + kmap(page); + page_fault = __copy_from_user(page_address(page) + offset, + buf, count); + /* Flush processor's dcache for this page */ + flush_dcache_page(page); + kunmap(page); + buf += count; + write_bytes -= count; + + if (page_fault) + break; + } + return page_fault ? -EFAULT : 0; +} + +static void btrfs_drop_pages(struct page **pages, size_t num_pages) +{ + size_t i; + for (i = 0; i < num_pages; i++) { + if (!pages[i]) + break; + unlock_page(pages[i]); + mark_page_accessed(pages[i]); + page_cache_release(pages[i]); + } +} +static int dirty_and_release_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + int offset; + int err = 0; + int ret; + int this_write; + + for (i = 0; i < num_pages; i++) { + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + ret = nobh_commit_write(file, pages[i], offset, + offset + this_write); + pos += this_write; + if (ret) { + err = ret; + goto failed; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } +failed: + return err; +} + +static int prepare_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + unsigned long index = pos >> PAGE_CACHE_SHIFT; + struct inode *inode = file->f_path.dentry->d_inode; + int offset; + int err = 0; + int ret; + int this_write; + loff_t isize = i_size_read(inode); + + memset(pages, 0, num_pages * sizeof(struct page *)); + + for (i = 0; i < num_pages; i++) { + pages[i] = grab_cache_page(inode->i_mapping, index + i); + if (!pages[i]) { + err = -ENOMEM; + goto failed_release; + } + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + ret = nobh_prepare_write(pages[i], offset, + offset + this_write, + btrfs_get_block_lock); + pos += this_write; + if (ret) { + err = ret; + goto failed_truncate; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } + return 0; + +failed_release: + btrfs_drop_pages(pages, num_pages); + return err; + +failed_truncate: + btrfs_drop_pages(pages, num_pages); + if (pos > isize) + vmtruncate(inode, isize); + return err; +} + +static ssize_t btrfs_file_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct btrfs_trans_handle *trans; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct page *pages[1]; + + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + mutex_lock(&inode->i_mutex); + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + + ret = prepare_pages(trans, root, file, pages, num_pages, + pos, write_bytes); + BUG_ON(ret); + ret = btrfs_copy_from_user(pos, num_pages, + write_bytes, pages, buf); + BUG_ON(ret); + + mutex_unlock(&root->fs_info->fs_mutex); + + ret = dirty_and_release_pages(trans, root, file, pages, + num_pages, pos, write_bytes); + BUG_ON(ret); + btrfs_drop_pages(pages, num_pages); + + ret = btrfs_end_transaction(trans, root); + + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } + mutex_unlock(&inode->i_mutex); +out: + *ppos = pos; + current->backing_dev_info = NULL; + return num_written ? num_written : err; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -948,7 +1156,7 @@ static struct address_space_operations btrfs_aops = { .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, - .commit_write = nobh_commit_write, + .commit_write = btrfs_commit_write, }; static struct inode_operations btrfs_file_inode_operations = { @@ -958,14 +1166,10 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .aio_read = generic_file_aio_read, + .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, - .sendfile = generic_file_sendfile, - .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, }; static int __init init_btrfs_fs(void) -- cgit v1.2.3 From f254e52c1ce550fdaa0d31f5e068f0d67c2485d4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 15:15:27 -0400 Subject: Btrfs: verify csums on read Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++- fs/btrfs/ctree.h | 47 ++++++++----- fs/btrfs/disk-io.c | 37 ++++++++--- fs/btrfs/disk-io.h | 2 + fs/btrfs/file-item.c | 60 +++++++++++++++++ fs/btrfs/super.c | 184 +++++++++++++++++++++++++++++++++++++++++++-------- 6 files changed, 279 insertions(+), 60 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 48c611948d11..1eb333e2e18b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -109,14 +109,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) - return 1; - if (k1.flags < k2->flags) - return -1; if (k1.offset > k2->offset) return 1; if (k1.offset < k2->offset) return -1; + if (k1.flags > k2->flags) + return 1; + if (k1.flags < k2->flags) + return -1; return 0; } @@ -1165,7 +1165,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root BUG(); ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { - btrfs_release_path(root, path); return -EEXIST; } if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6ff87f44c5d8..df1a025a771c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -21,6 +21,9 @@ struct btrfs_transaction; */ #define BTRFS_NAME_LEN 255 +/* 32 bytes in various csum fields */ +#define BTRFS_CSUM_SIZE 32 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags @@ -37,21 +40,21 @@ struct btrfs_transaction; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; __le64 offset; + __le32 flags; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; u64 offset; + u32 flags; } __attribute__ ((__packed__)); /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { - __le32 csum[8]; + u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; @@ -75,7 +78,7 @@ struct buffer_head; * it currently lacks any block count etc etc */ struct btrfs_super_block { - __le32 csum[8]; + u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* this block number */ @@ -147,7 +150,7 @@ struct btrfs_extent_item { } __attribute__ ((__packed__)); struct btrfs_inode_timespec { - __le32 sec; + __le64 sec; __le32 nsec; } __attribute__ ((__packed__)); @@ -214,6 +217,10 @@ struct btrfs_file_extent_item { __le64 num_blocks; } __attribute__ ((__packed__)); +struct btrfs_csum_item { + u8 csum[BTRFS_CSUM_SIZE]; +} __attribute__ ((__packed__)); + struct btrfs_inode_map_item { struct btrfs_disk_key key; } __attribute__ ((__packed__)); @@ -283,27 +290,32 @@ struct btrfs_root { * a (hopefully) huge chunk of disk */ #define BTRFS_EXTENT_DATA_KEY 4 +/* + * csum items have the checksums for data in the extents + */ +#define BTRFS_CSUM_ITEM_KEY 5 + /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 5 +#define BTRFS_ROOT_ITEM_KEY 6 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 6 +#define BTRFS_EXTENT_ITEM_KEY 7 /* * the inode map records which inode numbers are in use and where * they actually live on disk */ -#define BTRFS_INODE_MAP_ITEM_KEY 7 +#define BTRFS_INODE_MAP_ITEM_KEY 8 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 8 +#define BTRFS_STRING_ITEM_KEY 9 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -407,15 +419,15 @@ static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, i->compat_flags = cpu_to_le16(val); } -static inline u32 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) +static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) { - return le32_to_cpu(ts->sec); + return le64_to_cpu(ts->sec); } static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u32 val) + u64 val) { - ts->sec = cpu_to_le32(val); + ts->sec = cpu_to_le64(val); } static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) @@ -429,8 +441,6 @@ static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, ts->nsec = cpu_to_le32(val); } - - static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) { return le64_to_cpu(ei->owner); @@ -905,4 +915,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, u64 blocknr, int mod); +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len); +int btrfs_csum_verify_file_block(struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f2c1dd40c087..f5db2b936502 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -127,31 +127,46 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } -static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, - int verify) +int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, + char *result) { - struct btrfs_node *node = btrfs_buffer_node(bh); struct scatterlist sg; struct crypto_hash *tfm = root->fs_info->hash_tfm; struct hash_desc desc; int ret; - char result[32]; desc.tfm = tfm; desc.flags = 0; - sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + ret = crypto_hash_digest(&desc, &sg, len, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); } + return ret; +} +static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, + int verify) +{ + char result[BTRFS_CSUM_SIZE]; + int ret; + struct btrfs_node *node; + + ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, + bh->b_size - BTRFS_CSUM_SIZE, result); + if (ret) + return ret; if (verify) { - if (memcmp(node->header.csum, result, sizeof(result))) - printk("csum verify failed on %Lu\n", bh->b_blocknr); - return -EINVAL; - } else - memcpy(node->header.csum, result, sizeof(node->header.csum)); + if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { + printk("checksum verify failed on %lu\n", + bh->b_blocknr); + return 1; + } + } else { + node = btrfs_buffer_node(bh); + memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + } return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c2c38bda704d..f6998e2192ce 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,4 +39,6 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); +int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, + char *result); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 5230a44cb19b..2d2c23ca7cbf 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -57,3 +57,63 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; } + +int btrfs_csum_file_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_path path; + struct btrfs_csum_item *item; + + btrfs_init_path(&path); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + BTRFS_CSUM_SIZE); + if (ret != 0 && ret != -EEXIST) + goto fail; + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_csum_item); + ret = 0; + ret = btrfs_csum_data(root, data, len, item->csum); + mark_buffer_dirty(path.nodes[0]); +fail: + btrfs_release_path(root, &path); + return ret; +} + +int btrfs_csum_verify_file_block(struct btrfs_root *root, + u64 objectid, u64 offset, + char *data, size_t len) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_path path; + struct btrfs_csum_item *item; + char result[BTRFS_CSUM_SIZE]; + + btrfs_init_path(&path); + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0); + if (ret) + goto fail; + item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + struct btrfs_csum_item); + ret = 0; + ret = btrfs_csum_data(root, data, len, result); + WARN_ON(ret); + if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) + ret = 1; +fail: + btrfs_release_path(root, &path); + return ret; +} + diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7914b31f5bcd..04428137d75f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -249,15 +249,16 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi; - u64 extent_start; - u64 extent_num_blocks; + struct btrfs_file_extent_item *fi = NULL; + u64 extent_start = 0; + u64 extent_num_blocks = 0; + int found_extent; /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); @@ -273,25 +274,32 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_key = &leaf->items[path.slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; - if (btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && + btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; - fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], - struct btrfs_file_extent_item); - extent_start = btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); - key.offset = btrfs_disk_key_offset(found_key) - 1; + if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), + path.slots[0], + struct btrfs_file_extent_item); + extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + inode->i_blocks -= + btrfs_file_extent_num_blocks(fi) >> 9; + found_extent = 1; + } else { + found_extent = 0; + } ret = btrfs_del_item(trans, root, &path); BUG_ON(ret); - inode->i_blocks -= btrfs_file_extent_num_blocks(fi) >> 9; btrfs_release_path(root, &path); - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); - BUG_ON(ret); - if (key.offset + 1 == 0) - break; + if (found_extent) { + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + } } btrfs_release_path(root, &path); ret = 0; @@ -975,10 +983,24 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int ret; int this_write; + struct inode *inode = file->f_path.dentry->d_inode; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + /* FIXME, one block at a time */ + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_csum_file_block(trans, root, inode->i_ino, + pages[i]->index << PAGE_CACHE_SHIFT, + kmap(pages[i]), PAGE_CACHE_SIZE); + kunmap(pages[i]); + SetPageChecked(pages[i]); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + ret = nobh_commit_write(file, pages[i], offset, offset + this_write); pos += this_write; @@ -1022,7 +1044,7 @@ static int prepare_pages(struct btrfs_trans_handle *trans, this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); ret = nobh_prepare_write(pages[i], offset, offset + this_write, - btrfs_get_block_lock); + btrfs_get_block); pos += this_write; if (ret) { err = ret; @@ -1051,7 +1073,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t num_written = 0; int err = 0; int ret = 0; - struct btrfs_trans_handle *trans; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = btrfs_sb(inode->i_sb); struct page *pages[1]; @@ -1077,25 +1098,18 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - - ret = prepare_pages(trans, root, file, pages, num_pages, + ret = prepare_pages(NULL, root, file, pages, num_pages, pos, write_bytes); BUG_ON(ret); ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - - ret = dirty_and_release_pages(trans, root, file, pages, + ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); - ret = btrfs_end_transaction(trans, root); - buf += write_bytes; count -= write_bytes; pos += write_bytes; @@ -1111,6 +1125,118 @@ out: return num_written ? num_written : err; } +static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long left, count = desc->count; + + if (size > count) + size = count; + + if (!PageChecked(page)) { + /* FIXME, do it per block */ + struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + int ret = btrfs_csum_verify_file_block(root, + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); + if (ret) { + printk("failed to verify ino %lu page %lu\n", + page->mapping->host->i_ino, + page->index); + memset(page_address(page), 0, PAGE_CACHE_SIZE); + } + SetPageChecked(page); + kunmap(page); + } + /* + * Faults on the destination of a read are common, so do it before + * taking the kmap. + */ + if (!fault_in_pages_writeable(desc->arg.buf, size)) { + kaddr = kmap_atomic(page, KM_USER0); + left = __copy_to_user_inatomic(desc->arg.buf, + kaddr + offset, size); + kunmap_atomic(kaddr, KM_USER0); + if (left == 0) + goto success; + } + + /* Do it the slow way */ + kaddr = kmap(page); + left = __copy_to_user(desc->arg.buf, kaddr + offset, size); + kunmap(page); + + if (left) { + size -= left; + desc->error = -EFAULT; + } +success: + desc->count = count - size; + desc->written += size; + desc->arg.buf += size; + return size; +} + +/** + * btrfs_file_aio_read - filesystem read routine + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + */ +static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *filp = iocb->ki_filp; + ssize_t retval; + unsigned long seg; + size_t count; + loff_t *ppos = &iocb->ki_pos; + + count = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + count += iv->iov_len; + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + nr_segs = seg; + count -= iv->iov_len; /* This segment is no good */ + break; + } + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { + read_descriptor_t desc; + + desc.written = 0; + desc.arg.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; + desc.error = 0; + do_generic_file_read(filp, ppos, &desc, + btrfs_read_actor); + retval += desc.written; + if (desc.error) { + retval = retval ?: desc.error; + break; + } + } + } + return retval; +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1166,7 +1292,7 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = generic_file_aio_read, + .aio_read = btrfs_file_aio_read, .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, -- cgit v1.2.3 From 22b0ebda6c63a1ad66b6a9e806bd226a4a03a049 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 08:47:31 -0400 Subject: Btrfs: hunting slab corruption Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 ++ fs/btrfs/dir-item.c | 2 ++ fs/btrfs/disk-io.c | 16 +++++++++++++--- fs/btrfs/super.c | 7 ++++++- fs/btrfs/transaction.c | 6 ++++-- 5 files changed, 27 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1eb333e2e18b..b7f3fcb72c62 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -457,6 +457,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int ret; int level; + WARN_ON(p->nodes[0] != NULL); + WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; get_bh(b); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c18f81797a88..18d3e168ad0a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -35,6 +35,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); memcpy(name_ptr, name, name_len); + if (name_ptr + name_len > path.nodes[0]->b_data + 4096) + WARN_ON(1); mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5db2b936502..6c010463b9db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -50,6 +51,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -65,6 +68,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *head; struct buffer_head *ret = NULL; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); if (!page) return NULL; @@ -89,6 +93,8 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -139,7 +145,7 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, desc.flags = 0; sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, len, result); + ret = crypto_hash_digest(&desc, &sg, 1, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); @@ -153,6 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; + return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) @@ -165,17 +172,17 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } } else { node = btrfs_buffer_node(bh); - memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + memcpy(node->header.csum, result, BTRFS_CSUM_SIZE); } return 0; } static int btree_writepage(struct page *page, struct writeback_control *wbc) { +#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; - if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -187,6 +194,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); +#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -312,6 +320,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + insert_inode_hash(fs_info->btree_inode); + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 04428137d75f..ab5419eea93a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -361,13 +361,16 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; + struct btrfs_root *root = btrfs_sb(dir->i_sb); ino_t ino; int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); + mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); inode = NULL; @@ -395,6 +398,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) unsigned char d_type = DT_UNKNOWN; int over = 0; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); @@ -446,6 +450,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = 0; err: btrfs_release_path(root, &path); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -667,8 +672,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; - btrfs_end_transaction(trans, root); out_unlock: + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b20fb53a0d27..a146463c5049 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -197,8 +197,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); root->fs_info->generation = root->root_key.offset + 1; ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - printk("at free, total trans %d\n", total_trans); + BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); } return ret; -- cgit v1.2.3 From d6025579531b7ea170ba283b171ff7a6bf7d0e12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 14:27:56 -0400 Subject: Btrfs: corruption hunt continues Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 9 ++- fs/btrfs/ctree.c | 201 ++++++++++++++++++++++++++++--------------------- fs/btrfs/ctree.h | 32 ++++++++ fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 20 ++++- fs/btrfs/extent-tree.c | 8 +- fs/btrfs/file-item.c | 4 +- fs/btrfs/inode-map.c | 2 +- fs/btrfs/root-tree.c | 7 +- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 1 + 11 files changed, 187 insertions(+), 105 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 43807cc3cce7..845422368f48 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -16,12 +16,14 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOIO); + bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; + radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); + radix_tree_preload_end(); if (ret) return ret; } @@ -59,7 +61,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (!bits) return 0; clear_bit(bit_slot, bits + 1); - +#if 0 for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { if (bits[i]) { empty = 0; @@ -69,8 +71,11 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (empty) { bits = radix_tree_delete(radix, slot); + synchronize_rcu(); BUG_ON(!bits); + kfree(bits); } +#endif return 0; } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b7f3fcb72c62..a0dfa2d6cb9b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -51,7 +51,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); *cow_ret = cow; - mark_buffer_dirty(cow); + btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -62,7 +62,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, cow->b_blocknr); - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); @@ -312,11 +312,12 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!child); root->node = child; path->nodes[level] = NULL; + clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); /* once for the path */ btrfs_block_release(root, mid_buf); /* once for the root ptr */ btrfs_block_release(root, mid_buf); - clean_tree_block(trans, root, mid_buf); return btrfs_free_extent(trans, root, blocknr, 1, 1); } parent = btrfs_buffer_node(parent_buf); @@ -351,8 +352,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = right_buf->b_blocknr; - btrfs_block_release(root, right_buf); clean_tree_block(trans, root, right_buf); + wait_on_buffer(right_buf); + btrfs_block_release(root, right_buf); right_buf = NULL; right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + @@ -363,10 +365,11 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - memcpy(&parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot + 1].key, + &right->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } } if (btrfs_header_nritems(&mid->header) == 1) { @@ -388,8 +391,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ u64 blocknr = mid_buf->b_blocknr; - btrfs_block_release(root, mid_buf); clean_tree_block(trans, root, mid_buf); + wait_on_buffer(mid_buf); + btrfs_block_release(root, mid_buf); mid_buf = NULL; mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); @@ -400,9 +404,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - memcpy(&parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(parent_buf); + btrfs_memcpy(root, parent, + &parent->ptrs[pslot].key, &mid->ptrs[0].key, + sizeof(struct btrfs_disk_key)); + btrfs_mark_buffer_dirty(parent_buf); } /* update the path */ @@ -544,8 +549,8 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root if (!path->nodes[i]) break; t = btrfs_buffer_node(path->nodes[i]); - memcpy(&t->ptrs[tslot].key, key, sizeof(*key)); - mark_buffer_dirty(path->nodes[i]); + btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; } @@ -580,17 +585,17 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - memcpy(dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, + push_items * sizeof(struct btrfs_key_ptr)); if (push_items < src_nritems) { - memmove(src->ptrs, src->ptrs + push_items, + btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, (src_nritems - push_items) * sizeof(struct btrfs_key_ptr)); } btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -629,16 +634,18 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - memmove(dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); - memcpy(dst->ptrs, src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, + dst_nritems * sizeof(struct btrfs_key_ptr)); + + btrfs_memcpy(root, dst, dst->ptrs, + src->ptrs + src_nritems - push_items, + push_items * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&src->header, src_nritems - push_items); btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - mark_buffer_dirty(src_buf); - mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src_buf); + btrfs_mark_buffer_dirty(dst_buf); return ret; } @@ -674,10 +681,11 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else lower_key = &lower->ptrs[0].key; - memcpy(&c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, + sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); - mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(t); /* the super has an extra ref to root->node */ btrfs_block_release(root, root->node); @@ -712,13 +720,15 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - memmove(lower->ptrs + slot + 1, lower->ptrs + slot, - (nritems - slot) * sizeof(struct btrfs_key_ptr)); + btrfs_memmove(root, lower, lower->ptrs + slot + 1, + lower->ptrs + slot, + (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - memcpy(&lower->ptrs[slot].key, key, sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, lower, &lower->ptrs[slot].key, + key, sizeof(struct btrfs_disk_key)); btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); - mark_buffer_dirty(path->nodes[level]); + btrfs_mark_buffer_dirty(path->nodes[level]); return 0; } @@ -761,14 +771,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); mid = (c_nritems + 1) / 2; - memcpy(split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); btrfs_set_header_nritems(&split->header, c_nritems - mid); btrfs_set_header_nritems(&c->header, mid); ret = 0; - mark_buffer_dirty(t); - mark_buffer_dirty(split_buffer); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, split_buffer->b_blocknr, path->slots[level + 1] + 1, level + 1); @@ -875,17 +885,22 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_end(left->items + left_nritems - push_items); push_space -= leaf_data_end(root, left); /* make room in the right data area */ - memmove(btrfs_leaf_data(right) + leaf_data_end(root, right) - - push_space, btrfs_leaf_data(right) + leaf_data_end(root, right), - BTRFS_LEAF_DATA_SIZE(root) - leaf_data_end(root, right)); + btrfs_memmove(root, right, btrfs_leaf_data(right) + + leaf_data_end(root, right) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - + leaf_data_end(root, right)); /* copy from the left data area */ - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - memmove(right->items + push_items, right->items, + btrfs_memcpy(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(left) + leaf_data_end(root, left), + push_space); + btrfs_memmove(root, right, right->items + push_items, right->items, right_nritems * sizeof(struct btrfs_item)); /* copy the items from left to right */ - memcpy(right->items, left->items + left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, right->items, left->items + + left_nritems - push_items, + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; @@ -899,11 +914,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root left_nritems -= push_items; btrfs_set_header_nritems(&left->header, left_nritems); - mark_buffer_dirty(left_buf); - mark_buffer_dirty(right_buf); - memcpy(&upper_node->ptrs[slot + 1].key, + btrfs_mark_buffer_dirty(left_buf); + btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); - mark_buffer_dirty(upper); + btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { @@ -977,14 +992,16 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* push data from right to left */ - memcpy(left->items + btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + btrfs_memcpy(root, left, left->items + + btrfs_header_nritems(&left->header), + right->items, push_items * sizeof(struct btrfs_item)); push_space = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_offset(right->items + push_items -1); - memcpy(btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, - btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), - push_space); + btrfs_memcpy(root, left, btrfs_leaf_data(left) + + leaf_data_end(root, left) - push_space, + btrfs_leaf_data(right) + + btrfs_item_offset(right->items + push_items - 1), + push_space); old_left_nritems = btrfs_header_nritems(&left->header); BUG_ON(old_left_nritems < 0); @@ -1000,10 +1017,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root /* fixup right node */ push_space = btrfs_item_offset(right->items + push_items - 1) - leaf_data_end(root, right); - memmove(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - push_space, btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - memmove(right->items, right->items + push_items, + btrfs_memmove(root, right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + btrfs_memmove(root, right, right->items, right->items + push_items, (btrfs_header_nritems(&right->header) - push_items) * sizeof(struct btrfs_item)); btrfs_set_header_nritems(&right->header, @@ -1017,8 +1035,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root push_space = btrfs_item_offset(right->items + i); } - mark_buffer_dirty(t); - mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(right_buf); wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) @@ -1110,11 +1128,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_parentid(btrfs_buffer_header(root->node))); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); - memcpy(right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - memcpy(btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - - data_copy_size, btrfs_leaf_data(l) + - leaf_data_end(root, l), data_copy_size); + btrfs_memcpy(root, right, right->items, l->items + mid, + (nritems - mid) * sizeof(struct btrfs_item)); + btrfs_memcpy(root, right, + btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - + data_copy_size, btrfs_leaf_data(l) + + leaf_data_end(root, l), data_copy_size); rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - btrfs_item_end(l->items + mid); @@ -1129,8 +1148,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root right_buffer->b_blocknr, path->slots[1] + 1, 1); if (wret) ret = wret; - mark_buffer_dirty(right_buffer); - mark_buffer_dirty(l_buf); + btrfs_mark_buffer_dirty(right_buffer); + btrfs_mark_buffer_dirty(l_buf); BUG_ON(path->slots[0] != slot); if (mid <= slot) { btrfs_block_release(root, path->nodes[0]); @@ -1200,22 +1219,23 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root } /* shift the items */ - memmove(leaf->items + slot + 1, leaf->items + slot, - (nritems - slot) * sizeof(struct btrfs_item)); + btrfs_memmove(root, leaf, leaf->items + slot + 1, + leaf->items + slot, + (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - memmove(btrfs_leaf_data(leaf) + data_end - data_size, - btrfs_leaf_data(leaf) + - data_end, old_data - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); data_end = old_data; } /* setup the item for the new data */ - memcpy(&leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); + btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, + sizeof(struct btrfs_disk_key)); btrfs_set_item_offset(leaf->items + slot, data_end - data_size); btrfs_set_item_size(leaf->items + slot, data_size); btrfs_set_header_nritems(&leaf->header, nritems + 1); - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); ret = 0; if (slot == 0) @@ -1245,8 +1265,9 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root if (!ret) { ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], u8); - memcpy(ptr, data, data_size); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, + ptr, data, data_size); + btrfs_mark_buffer_dirty(path.nodes[0]); } btrfs_release_path(root, &path); return ret; @@ -1271,8 +1292,10 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, node = btrfs_buffer_node(parent); nritems = btrfs_header_nritems(&node->header); if (slot != nritems -1) { - memmove(node->ptrs + slot, node->ptrs + slot + 1, - sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); + btrfs_memmove(root, node, node->ptrs + slot, + node->ptrs + slot + 1, + sizeof(struct btrfs_key_ptr) * + (nritems - slot - 1)); } nritems--; btrfs_set_header_nritems(&node->header, nritems); @@ -1287,7 +1310,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } - mark_buffer_dirty(parent); + btrfs_mark_buffer_dirty(parent); return ret; } @@ -1317,16 +1340,18 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - memmove(btrfs_leaf_data(leaf) + data_end + dsize, - btrfs_leaf_data(leaf) + data_end, - doff - data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + dsize, + btrfs_leaf_data(leaf) + data_end, + doff - data_end); for (i = slot + 1; i < nritems; i++) { u32 ioff = btrfs_item_offset(leaf->items + i); btrfs_set_item_offset(leaf->items + i, ioff + dsize); } - memmove(leaf->items + slot, leaf->items + slot + 1, - sizeof(struct btrfs_item) * - (nritems - slot - 1)); + btrfs_memmove(root, leaf, leaf->items + slot, + leaf->items + slot + 1, + sizeof(struct btrfs_item) * + (nritems - slot - 1)); } btrfs_set_header_nritems(&leaf->header, nritems - 1); nritems--; @@ -1336,6 +1361,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, btrfs_set_header_level(&leaf->header, 0); } else { clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; @@ -1373,6 +1399,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_nritems(&leaf->header) == 0) { u64 blocknr = leaf_buf->b_blocknr; clean_tree_block(trans, root, leaf_buf); + wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; @@ -1382,11 +1409,11 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); btrfs_block_release(root, leaf_buf); } } else { - mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf_buf); } } return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index df1a025a771c..9ec0d65ebe9b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2,6 +2,7 @@ #define __BTRFS__ #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -834,6 +835,37 @@ static inline struct btrfs_root *btrfs_sb(struct super_block *sb) return sb->s_fs_info; } +static inline void btrfs_check_bounds(void *vptr, size_t len, + void *vcontainer, size_t container_len) +{ + char *ptr = vptr; + char *container = vcontainer; + WARN_ON(ptr < container); + WARN_ON(ptr + len > container + container_len); +} + +static inline void btrfs_memcpy(struct btrfs_root *root, + void *dst_block, + void *dst, const void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memcpy(dst, src, nr); +} + +static inline void btrfs_memmove(struct btrfs_root *root, + void *dst_block, + void *dst, void *src, size_t nr) +{ + btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); + memmove(dst, src, nr); +} + +static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + WARN_ON(!atomic_read(&bh->b_count)); + mark_buffer_dirty(bh); +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 18d3e168ad0a..f55c89472ac0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -34,10 +34,8 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - memcpy(name_ptr, name, name_len); - if (name_ptr + name_len > path.nodes[0]->b_data + 4096) - WARN_ON(1); - mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c010463b9db..bb133104e2e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,6 +8,17 @@ #include "disk-io.h" #include "transaction.h" +#define PATTERN 0xDEADBEEFUL +static inline void check_pattern(struct buffer_head *buf) +{ + if (buf->b_private != (void *)PATTERN) + WARN_ON(1); +} + +static inline void set_pattern(struct buffer_head *buf) +{ + buf->b_private = (void *)PATTERN; +} static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -51,8 +62,10 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) + if (ret) { touch_buffer(ret); + check_pattern(ret); + } page_cache_release(page); return ret; } @@ -82,6 +95,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); + set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -225,6 +239,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); + set_pattern(bh); } else { unlock_buffer(bh); } @@ -240,6 +255,7 @@ fail: int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); mark_buffer_dirty(buf); return 0; } @@ -247,6 +263,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); clear_buffer_dirty(buf); return 0; } @@ -431,6 +448,7 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { + check_pattern(buf); brelse(buf); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2ae8e768b6c..c4194dab7a33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,7 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); btrfs_release_path(root->fs_info->extent_root, &path); finish_current_insert(trans, root->fs_info->extent_root); @@ -177,10 +177,10 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) header = btrfs_buffer_header(bh); if (btrfs_header_generation(header) == root->fs_info->running_transaction->transid) { - brelse(bh); + btrfs_block_release(root, bh); return 0; } - brelse(bh); + btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); } else { @@ -224,7 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); if (refs == 0) { u64 super_blocks_used; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 2d2c23ca7cbf..c3992b7b0c60 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,7 +34,7 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); *result = ins.objectid; btrfs_release_path(root, &path); return 0; @@ -81,7 +81,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); fail: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 1b2c5e043508..fa4adb81b149 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -109,7 +109,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(inode_root, &path); return ret; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a821b5d1e233..f37dab87de16 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -45,6 +45,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_leaf *l; int ret; int slot; + struct btrfs_root_item *update_item; btrfs_init_path(&path); ret = btrfs_search_slot(trans, root, key, &path, 0, 1); @@ -53,9 +54,9 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret != 0); l = btrfs_buffer_leaf(path.nodes[0]); slot = path.slots[0]; - memcpy(btrfs_item_ptr(l, slot, struct btrfs_root_item), item, - sizeof(*item)); - mark_buffer_dirty(path.nodes[0]); + update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); + btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + btrfs_mark_buffer_dirty(path.nodes[0]); out: btrfs_release_path(root, &path); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ab5419eea93a..894a70bf26d2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -557,7 +557,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item); fill_inode_item(inode_item, inode); - mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a146463c5049..84c4e278ce2e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -66,6 +66,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, cur_trans->num_writers--; put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); + memset(trans, 0, sizeof(*trans)); kfree(trans); return 0; } -- cgit v1.2.3 From 2c90e5d658424bc71b111eb5a972240d5d06fe86 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 10:50:19 -0400 Subject: Btrfs: still corruption hunting Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 7 +-- fs/btrfs/btrfs_inode.h | 14 +++++ fs/btrfs/ctree.c | 42 ++++++++++---- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 84 ++++++++++++++-------------- fs/btrfs/disk-io.h | 4 +- fs/btrfs/extent-tree.c | 18 ++++-- fs/btrfs/super.c | 146 ++++++++++++++++++++++++++++++++++++++++++++----- fs/btrfs/transaction.c | 39 +++++++++---- fs/btrfs/transaction.h | 3 + 10 files changed, 265 insertions(+), 95 deletions(-) create mode 100644 fs/btrfs/btrfs_inode.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 845422368f48..a50a1c44eabd 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -4,6 +4,7 @@ #define BIT_ARRAY_BYTES 256 #define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) +extern struct kmem_cache *btrfs_bit_radix_cachep; int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) { unsigned long *bits; @@ -16,7 +17,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) bits = radix_tree_lookup(radix, slot); if (!bits) { - bits = kmalloc(BIT_ARRAY_BYTES, GFP_NOFS); + bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); if (!bits) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); @@ -68,12 +69,10 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) break; } } - if (empty) { bits = radix_tree_delete(radix, slot); - synchronize_rcu(); BUG_ON(!bits); - kfree(bits); + kmem_cache_free(btrfs_bit_radix_cachep, bits); } #endif return 0; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h new file mode 100644 index 000000000000..e159841650a4 --- /dev/null +++ b/fs/btrfs/btrfs_inode.h @@ -0,0 +1,14 @@ +#ifndef __BTRFS_I__ +#define __BTRFS_I__ + +struct btrfs_inode { + u32 magic; + struct inode vfs_inode; + u32 magic2; +}; +static inline struct btrfs_inode *BTRFS_I(struct inode *inode) +{ + return container_of(inode, struct btrfs_inode, vfs_inode); +} + +#endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index a0dfa2d6cb9b..453ce835209a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,6 +16,16 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); +struct btrfs_path *btrfs_alloc_path(void) +{ + return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); +} + +void btrfs_free_path(struct btrfs_path *p) +{ + kmem_cache_free(btrfs_path_cachep, p); +} + inline void btrfs_init_path(struct btrfs_path *p) { memset(p, 0, sizeof(*p)); @@ -47,17 +57,18 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root } cow = btrfs_alloc_free_block(trans, root); cow_node = btrfs_buffer_node(cow); + if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); btrfs_set_header_generation(&cow_node->header, trans->transid); - *cow_ret = cow; - btrfs_mark_buffer_dirty(cow); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); - if (buf != root->commit_root) + if (buf != root->commit_root) { btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, @@ -66,6 +77,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + *cow_ret = cow; return 0; } @@ -477,9 +489,12 @@ again: p->slots[level + 1], &cow_buf); b = cow_buf; + c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - c = btrfs_buffer_node(b); + if (level != btrfs_header_level(&c->header)) + WARN_ON(1); + level = btrfs_header_level(&c->header); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) @@ -1257,19 +1272,22 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root data_size) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; u8 *ptr; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, root, &path, cpu_key, data_size); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], u8); - btrfs_memcpy(root, path.nodes[0]->b_data, + ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], u8); + btrfs_memcpy(root, path->nodes[0]->b_data, ptr, data, data_size); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9ec0d65ebe9b..d8e03bd797ff 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -7,6 +7,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" @@ -888,6 +889,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); +struct btrfs_path *btrfs_alloc_path(void); +void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb133104e2e9..2dbd55084a4e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,18 +8,6 @@ #include "disk-io.h" #include "transaction.h" -#define PATTERN 0xDEADBEEFUL -static inline void check_pattern(struct buffer_head *buf) -{ - if (buf->b_private != (void *)PATTERN) - WARN_ON(1); -} - -static inline void set_pattern(struct buffer_head *buf) -{ - buf->b_private = (void *)PATTERN; -} - static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); @@ -35,6 +23,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_find_get_block(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -43,6 +33,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) struct buffer_head *head; struct buffer_head *ret = NULL; + page = find_lock_page(mapping, index); if (!page) return NULL; @@ -64,15 +55,17 @@ out_unlock: unlock_page(page); if (ret) { touch_buffer(ret); - check_pattern(ret); } page_cache_release(page); return ret; +#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_getblk(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -95,7 +88,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); - set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -111,6 +103,7 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; +#endif } static sector_t max_block(struct block_device *bdev) @@ -225,6 +218,8 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_bread(root->fs_info->sb, blocknr); +#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -239,7 +234,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); - set_pattern(bh); } else { unlock_buffer(bh); } @@ -250,6 +244,7 @@ fail: brelse(bh); return NULL; +#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -268,14 +263,14 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(struct btrfs_super_block *super, +static int __setup_root(int blocksize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->blocksize = btrfs_super_blocksize(super); + root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); @@ -283,7 +278,7 @@ static int __setup_root(struct btrfs_super_block *super, return 0; } -static int find_and_setup_root(struct btrfs_super_block *super, +static int find_and_setup_root(int blocksize, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, @@ -291,7 +286,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, { int ret; - __setup_root(super, root, fs_info, objectid); + __setup_root(blocksize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -302,9 +297,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super) +struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -317,13 +310,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + struct btrfs_super_block *disk_super; - if (!btrfs_super_root(disk_super)) { - return NULL; - } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - sb_set_blocksize(sb, sb_buffer->b_size); + sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -331,55 +322,59 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = disk_super; fs_info->sb = sb; + fs_info->btree_inode = NULL; +#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); +#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - - fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + __setup_root(sb->s_blocksize, tree_root, + fs_info, BTRFS_ROOT_TREE_OBJECTID); + fs_info->sb_buffer = read_tree_block(tree_root, + BTRFS_SUPER_INFO_OFFSET / + sb->s_blocksize); if (!fs_info->sb_buffer) { printk("failed2\n"); return NULL; } - brelse(sb_buffer); - sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) { + return NULL; + } fs_info->disk_super = disk_super; - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + mutex_lock(&fs_info->fs_mutex); + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); + mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); @@ -392,9 +387,11 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); + WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; @@ -413,6 +410,7 @@ int close_ctree(struct btrfs_root *root) int ret; struct btrfs_trans_handle *trans; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -421,6 +419,7 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); + mutex_unlock(&root->fs_info->fs_mutex); if (root->node) btrfs_block_release(root, root->node); @@ -436,8 +435,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); + // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + // iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -448,7 +447,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - check_pattern(buf); - brelse(buf); + // brelse(buf); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f6998e2192ce..ac6764ba8aa6 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -31,9 +31,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super); +struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4194dab7a33..37b87e28a2f3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -173,12 +173,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) if (!pending) { bh = btrfs_find_tree_block(root, blocknr); - if (bh && buffer_uptodate(bh)) { - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - root->fs_info->running_transaction->transid) { - btrfs_block_release(root, bh); - return 0; + if (bh) { + if (buffer_uptodate(bh)) { + u64 transid = + root->fs_info->running_transaction->transid; + header = btrfs_buffer_header(bh); + if (btrfs_header_generation(header) == + transid) { + btrfs_block_release(root, bh); + return 0; + } } btrfs_block_release(root, bh); } @@ -539,6 +543,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root */ while(*level >= 0) { cur = path->nodes[*level]; + if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + WARN_ON(1); if (path->slots[*level] >= btrfs_header_nritems(btrfs_buffer_header(cur))) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 894a70bf26d2..6969b672b570 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -14,6 +14,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" #define BTRFS_SUPER_MAGIC 0x9123682E @@ -24,6 +25,14 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; +static int check_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + return 0; +} + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path path; @@ -34,6 +43,7 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(&path); mutex_lock(&root->fs_info->fs_mutex); + check_inode(inode); ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, &path); @@ -41,6 +51,7 @@ static void btrfs_read_locked_inode(struct inode *inode) make_bad_inode(inode); return; } + check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], struct btrfs_inode_item); @@ -60,6 +71,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_generation = btrfs_inode_generation(inode_item); btrfs_release_path(root, &path); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -80,6 +92,7 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } + check_inode(inode); return; } @@ -347,6 +360,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { *ino = 0; + ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], @@ -354,6 +368,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, *ino = btrfs_dir_objectid(di); out: btrfs_release_path(root, &path); + check_inode(dir); return ret; } @@ -367,7 +382,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &ino); mutex_unlock(&root->fs_info->fs_mutex); @@ -378,7 +392,9 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, inode = iget(dir->i_sb, ino); if (!inode) return ERR_PTR(-EACCES); + check_inode(inode); } + check_inode(dir); return d_splice_alias(inode, dentry); } @@ -471,23 +487,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct buffer_head *bh; struct btrfs_root *root; sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - bh = sb_bread(sb, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!bh) { - printk("btrfs: unable to read on disk super\n"); - return -EIO; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - root = open_ctree(sb, bh, disk_super); + root = open_ctree(sb); if (!root) { printk("btrfs: open_ctree failed\n"); @@ -533,6 +540,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -560,6 +568,7 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path.nodes[0]); failed: btrfs_release_path(root, &path); + check_inode(inode); return 0; } @@ -577,6 +586,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); return ret; } @@ -594,6 +604,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -616,6 +627,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); insert_inode_hash(inode); + check_inode(inode); + check_inode(dir); return inode; } @@ -632,7 +645,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } - + check_inode(inode); + check_inode(dentry->d_parent->d_inode); return ret; } @@ -644,6 +658,9 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, d_instantiate(dentry, inode); return 0; } + if (err > 0) + err = -EEXIST; + check_inode(inode); return err; } @@ -675,6 +692,9 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + check_inode(inode); + check_inode(dir); + if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -755,11 +775,11 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); + // filemap_flush(root->fs_info->btree_inode->i_mapping); + filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1242,6 +1262,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +/* + * Called inside transaction, so use GFP_NOFS + */ +static struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + ei->magic = 0xDEADBEEF; + ei->magic2 = 0xDEADBEAF; + return &ei->vfs_inode; +} + +static void btrfs_destroy_inode(struct inode *inode) +{ + struct btrfs_inode *ei = BTRFS_I(inode); + WARN_ON(ei->magic != 0xDEADBEEF); + WARN_ON(ei->magic2 != 0xDEADBEAF); + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_ino == 1); + WARN_ON(inode->i_data.nrpages); + + ei->magic = 0; + ei->magic2 = 0; + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) { + inode_init_once(&ei->vfs_inode); + } +} + +static int init_inodecache(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || + btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) + return -ENOMEM; + return 0; +} + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(btrfs_inode_cachep); + kmem_cache_destroy(btrfs_trans_handle_cachep); + kmem_cache_destroy(btrfs_transaction_cachep); + kmem_cache_destroy(btrfs_bit_radix_cachep); + kmem_cache_destroy(btrfs_path_cachep); +} + static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { @@ -1265,6 +1374,8 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .alloc_inode = btrfs_alloc_inode, + .destroy_inode = btrfs_destroy_inode, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -1305,12 +1416,17 @@ static struct file_operations btrfs_file_operations = { static int __init init_btrfs_fs(void) { + int err; printk("btrfs loaded!\n"); + err = init_inodecache(); + if (err) + return err; return register_filesystem(&btrfs_fs_type); } static void __exit exit_btrfs_fs(void) { + destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 84c4e278ce2e..72b52e1e0b1b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -5,13 +5,20 @@ #include "transaction.h" static int total_trans = 0; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; + +#define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { + WARN_ON(transaction->use_count == 0); transaction->use_count--; + WARN_ON(transaction->magic != TRANS_MAGIC); if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; - kfree(transaction); + memset(transaction, 0, sizeof(*transaction)); + kmem_cache_free(btrfs_transaction_cachep, transaction); } } @@ -20,7 +27,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_transaction *cur_trans; cur_trans = root->fs_info->running_transaction; if (!cur_trans) { - cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); + cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, + GFP_NOFS); total_trans++; BUG_ON(!cur_trans); root->fs_info->running_transaction = cur_trans; @@ -28,6 +36,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->transid = root->root_key.offset + 1; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); + cur_trans->magic = TRANS_MAGIC; cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; @@ -39,7 +48,8 @@ static int join_transaction(struct btrfs_root *root) struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, int num_blocks) { - struct btrfs_trans_handle *h = kmalloc(sizeof(*h), GFP_NOFS); + struct btrfs_trans_handle *h = + kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; mutex_lock(&root->fs_info->trans_mutex); @@ -51,6 +61,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->blocks_used = 0; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); + h->magic = h->magic2 = TRANS_MAGIC; return h; } @@ -58,6 +69,8 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + WARN_ON(trans->magic != TRANS_MAGIC); + WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); cur_trans = root->fs_info->running_transaction; WARN_ON(cur_trans->num_writers < 1); @@ -67,7 +80,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); memset(trans, 0, sizeof(*trans)); - kfree(trans); + kmem_cache_free(btrfs_trans_handle_cachep, trans); return 0; } @@ -75,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); return 0; } @@ -137,6 +150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { +printk("already in commit!, waiting\n"); cur_trans = trans->transaction; trans->transaction->use_count++; btrfs_end_transaction(trans, root); @@ -146,7 +160,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); return 0; } + cur_trans = trans->transaction; + trans->transaction->in_commit = 1; while (trans->transaction->num_writers > 1) { + WARN_ON(cur_trans != trans->transaction); prepare_to_wait(&trans->transaction->writer_wait, &wait, TASK_UNINTERRUPTIBLE); if (trans->transaction->num_writers <= 1) @@ -154,15 +171,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); schedule(); mutex_lock(&root->fs_info->trans_mutex); + finish_wait(&trans->transaction->writer_wait, &wait); } finish_wait(&trans->transaction->writer_wait, &wait); - + WARN_ON(cur_trans != trans->transaction); if (root->node != root->commit_root) { memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; } - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, @@ -172,22 +189,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); - cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; mutex_unlock(&root->fs_info->trans_mutex); - ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); + cur_trans->commit_done = 1; + wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - kfree(trans); - + kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->node != root->commit_root) { trans = btrfs_start_transaction(root, 1); snap = root->commit_root; @@ -203,7 +219,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_end_transaction(trans, root); BUG_ON(ret); } - return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 7e6c08a0accd..4f1496ae6f24 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -7,15 +7,18 @@ struct btrfs_transaction { int in_commit; int use_count; int commit_done; + int magic; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; struct btrfs_trans_handle { + int magic; u64 transid; unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + int magic2; }; -- cgit v1.2.3 From 5caf2a002901f0fde475371c4bf1c553b51884af Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 11:20:42 -0400 Subject: Btrfs: dynamic allocation of path struct Signed-off-by: Chris Mason --- fs/btrfs/dir-item.c | 18 +++-- fs/btrfs/extent-tree.c | 100 +++++++++++++++---------- fs/btrfs/file-item.c | 43 ++++++----- fs/btrfs/inode-item.c | 9 ++- fs/btrfs/root-tree.c | 47 +++++++----- fs/btrfs/super.c | 198 ++++++++++++++++++++++++++++--------------------- 6 files changed, 243 insertions(+), 172 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f55c89472ac0..baceb1da609f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -9,7 +9,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root objectid, u8 type) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_dir_item *dir_item; char *name_ptr; struct btrfs_key key; @@ -20,24 +20,26 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size); + ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); if (ret) goto out; - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_dir_item); btrfs_set_dir_objectid(dir_item, objectid); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path.nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 37b87e28a2f3..d785b721b461 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -15,7 +15,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; @@ -25,23 +25,26 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret != 0) BUG(); BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(item); btrfs_set_extent_refs(item, refs + 1); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, root->fs_info->extent_root); del_pending_extents(trans, root->fs_info->extent_root); return 0; @@ -50,24 +53,27 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) { - struct btrfs_path path; + struct btrfs_path *path; int ret; struct btrfs_key key; struct btrfs_leaf *l; struct btrfs_extent_item *item; - btrfs_init_path(&path); + + path = btrfs_alloc_path(); + btrfs_init_path(path); key.objectid = blocknr; key.offset = num_blocks; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, &path, + ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret != 0) BUG(); - l = btrfs_buffer_leaf(path.nodes[0]); - item = btrfs_item_ptr(l, path.slots[0], struct btrfs_extent_item); + l = btrfs_buffer_leaf(path->nodes[0]); + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, &path); + btrfs_release_path(root->fs_info->extent_root, path); + btrfs_free_path(path); return 0; } @@ -200,7 +206,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -215,20 +221,22 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root key.offset = num_blocks; find_free_extent(trans, root, 0, 0, (u64)-1, &ins); - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, extent_root, &key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); btrfs_print_tree(extent_root, extent_root->node); printk("failed to find %Lu\n", key.objectid); BUG(); } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); refs = btrfs_extent_refs(ei) - 1; btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { u64 super_blocks_used; @@ -240,13 +248,14 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); - ret = btrfs_del_item(trans, extent_root, &path); + ret = btrfs_del_item(trans, extent_root, path); if (extent_root->fs_info->last_insert.objectid > blocknr) extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } - btrfs_release_path(extent_root, &path); + btrfs_release_path(extent_root, path); + btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; } @@ -319,7 +328,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -339,24 +348,25 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); + path = btrfs_alloc_path(); check_failed: - btrfs_init_path(&path); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; - ret = btrfs_search_slot(trans, root, ins, &path, 0, 0); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path.slots[0] > 0) - path.slots[0]--; + if (path->slots[0] > 0) + path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret == 0) continue; if (ret < 0) @@ -387,14 +397,14 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; - path.slots[0]++; + path->slots[0]++; } // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - btrfs_release_path(root, &path); + btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); for (test_block = ins->objectid; test_block < ins->objectid + total_needed; test_block++) { @@ -410,9 +420,11 @@ check_pending: root->fs_info->current_insert.flags = 0; root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; + btrfs_free_path(path); return 0; error: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -533,6 +545,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u32 refs; + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, 1, &refs); BUG_ON(ret); @@ -542,6 +556,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root * walk down to the last node level and free all the leaves */ while(*level >= 0) { + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); cur = path->nodes[*level]; if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) WARN_ON(1); @@ -564,6 +580,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = read_tree_block(root, blocknr); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); path->nodes[*level-1] = next; @@ -571,6 +588,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[*level] = 0; } out: + WARN_ON(*level < 0); + WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, path->nodes[*level]->b_blocknr, 1, 1); btrfs_block_release(root, path->nodes[*level]); @@ -622,33 +641,36 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int ret = 0; int wret; int level; - struct btrfs_path path; + struct btrfs_path *path; int i; int orig_level; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); level = btrfs_header_level(btrfs_buffer_header(snap)); orig_level = level; - path.nodes[level] = snap; - path.slots[level] = 0; + path->nodes[level] = snap; + path->slots[level] = 0; while(1) { - wret = walk_down_tree(trans, root, &path, &level); + wret = walk_down_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; - wret = walk_up_tree(trans, root, &path, &level); + wret = walk_up_tree(trans, root, path, &level); if (wret > 0) break; if (wret < 0) ret = wret; } for (i = 0; i <= orig_level; i++) { - if (path.nodes[i]) { - btrfs_block_release(root, path.nodes[i]); + if (path->nodes[i]) { + btrfs_block_release(root, path->nodes[i]); } } + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index c3992b7b0c60..e7510ac5559d 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -13,9 +13,11 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, objectid, &ins); BUG_ON(ret); @@ -24,19 +26,20 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, ins.objectid); btrfs_set_file_extent_disk_num_blocks(item, ins.offset); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, ins.offset); btrfs_set_file_extent_generation(item, trans->transid); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); *result = ins.objectid; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return 0; } @@ -65,25 +68,28 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_insert_empty_item(trans, root, &path, &file_key, + ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CSUM_SIZE); if (ret != 0 && ret != -EEXIST) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, item->csum); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -93,19 +99,21 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, { int ret; struct btrfs_key file_key; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_csum_item *item; char result[BTRFS_CSUM_SIZE]; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, &path, 0, 0); + ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; ret = btrfs_csum_data(root, data, len, result); @@ -113,7 +121,8 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) ret = 1; fail: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 8d8c26a6c1a0..6bfa980790c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -7,7 +7,7 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; @@ -15,10 +15,13 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); ret = btrfs_insert_item(trans, root, &key, inode_item, sizeof(*inode_item)); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index f37dab87de16..ddc1c13a5352 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -6,7 +6,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key search_key; struct btrfs_leaf *l; int ret; @@ -16,14 +16,16 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, search_key.flags = (u32)-1; search_key.offset = (u32)-1; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &search_key, &path, 0, 0); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path.nodes[0]); - BUG_ON(path.slots[0] == 0); - slot = path.slots[0] - 1; + l = btrfs_buffer_leaf(path->nodes[0]); + BUG_ON(path->slots[0] == 0); + slot = path->slots[0] - 1; if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { ret = 1; goto out; @@ -31,9 +33,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), sizeof(*item)); btrfs_disk_key_to_cpu(key, &l->items[slot].key); - btrfs_release_path(root, &path); ret = 0; out: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -41,24 +44,27 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_leaf *l; int ret; int slot; struct btrfs_root_item *update_item; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, 0, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, 0, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); btrfs_memcpy(root, l, update_item, item, sizeof(*item)); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -75,16 +81,19 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, key, &path, -1, 1); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6969b672b570..c260fcad17b3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,25 +35,27 @@ static int check_inode(struct inode *inode) static void btrfs_read_locked_inode(struct inode *inode) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_item *inode_item; struct btrfs_root *root = btrfs_sb(inode->i_sb); int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); check_inode(inode); - ret = btrfs_lookup_inode(NULL, root, &path, inode->i_ino, 0); + ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); if (ret) { - btrfs_release_path(root, &path); + btrfs_release_path(root, path); mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); return; } check_inode(inode); - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); inode->i_mode = btrfs_inode_mode(inode_item); @@ -69,7 +71,11 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); - btrfs_release_path(root, &path); + + btrfs_release_path(root, path); + btrfs_free_path(path); + inode_item = NULL; + mutex_unlock(&root->fs_info->fs_mutex); check_inode(inode); switch (inode->i_mode & S_IFMT) { @@ -101,15 +107,17 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct inode *dir, struct dentry *dentry) { - struct btrfs_path path; + struct btrfs_path *path; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret; u64 objectid; struct btrfs_dir_item *di; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(trans, root, &path, dir->i_ino, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (ret < 0) goto err; @@ -117,15 +125,16 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); if (ret == 0) inode_dec_link_count(dentry->d_inode); return ret; @@ -152,30 +161,32 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int ret; struct btrfs_root *root = btrfs_sb(dir->i_sb); - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; } BUG_ON(ret == 0); - BUG_ON(path.slots[0] == 0); - path.slots[0]--; - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { err = -ENOENT; goto out; @@ -185,11 +196,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { err = ret; goto out; @@ -198,12 +209,13 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; goto out; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); if (ret) { err = ret; goto out; } - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); @@ -223,33 +235,36 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct inode *inode) { u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_map_item *map; struct btrfs_key stat_data_key; int ret; + clear_inode(inode); - btrfs_init_path(&path); - ret = btrfs_lookup_inode_map(trans, root, &path, objectid, -1); + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1); if (ret) { if (ret > 0) ret = -ENOENT; - btrfs_release_path(root, &path); goto error; } - map = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_inode_map_item); btrfs_disk_key_to_cpu(&stat_data_key, &map->key); - ret = btrfs_del_item(trans, root->fs_info->inode_root, &path); + ret = btrfs_del_item(trans, root->fs_info->inode_root, path); BUG_ON(ret); - btrfs_release_path(root, &path); - btrfs_init_path(&path); + btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, &path, objectid, -1); + ret = btrfs_lookup_inode(trans, root, path, objectid, -1); BUG_ON(ret); - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -258,7 +273,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct inode *inode) { int ret; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; @@ -267,24 +282,25 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, u64 extent_num_blocks = 0; int found_extent; + path = btrfs_alloc_path(); + BUG_ON(!path); /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { - btrfs_init_path(&path); - ret = btrfs_search_slot(trans, root, &key, &path, -1, 1); + btrfs_init_path(path); + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { - btrfs_release_path(root, &path); goto error; } if (ret > 0) { - BUG_ON(path.slots[0] == 0); - path.slots[0]--; + BUG_ON(path->slots[0] == 0); + path->slots[0]--; } - leaf = btrfs_buffer_leaf(path.nodes[0]); - found_key = &leaf->items[path.slots[0]].key; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && @@ -293,8 +309,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_offset(found_key) < inode->i_size) break; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = @@ -305,18 +321,19 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } else { found_extent = 0; } - ret = btrfs_del_item(trans, root, &path); + ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, &path); + btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, extent_num_blocks, 0); BUG_ON(ret); } } - btrfs_release_path(root, &path); ret = 0; error: + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -351,23 +368,26 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; struct btrfs_dir_item *di; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(dir->i_sb); int ret; - btrfs_init_path(&path); - ret = btrfs_lookup_dir_item(NULL, root, &path, dir->i_ino, name, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); - if (ret || !btrfs_match_dir_item_name(root, &path, name, namelen)) { + if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { *ino = 0; ret = 0; goto out; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); *ino = btrfs_dir_objectid(di); out: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(dir); return ret; } @@ -405,7 +425,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; - struct btrfs_path path; + struct btrfs_path *path; int ret; u32 nritems; struct btrfs_leaf *leaf; @@ -419,27 +439,28 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); key.offset = filp->f_pos; - btrfs_init_path(&path); - ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); + path = btrfs_alloc_path(); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { goto err; } advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); - slot = path.slots[0]; + slot = path->slots[0]; } else { slot++; - path.slots[0]++; + path->slots[0]++; } } advance = 1; @@ -465,7 +486,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } ret = 0; err: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -548,26 +570,29 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_inode_item *inode_item; - struct btrfs_path path; + struct btrfs_path *path; int ret; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, &path, inode->i_ino, 1); + ret = btrfs_lookup_inode(trans, root, path, inode->i_ino, 1); if (ret) { if (ret > 0) ret = -ENOENT; goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_item); fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); failed: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); check_inode(inode); return 0; } @@ -799,38 +824,39 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_root *root = btrfs_sb(inode->i_sb); struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, &path, + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); if (ret < 0) { - btrfs_release_path(root, &path); err = ret; goto out; } if (ret != 0) { - if (path.slots[0] == 0) { - btrfs_release_path(root, &path); + if (path->slots[0] == 0) { + btrfs_release_path(root, path); goto allocate; } - path.slots[0]--; + path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), path.slots[0], + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path.nodes[0]); + leaf = btrfs_buffer_leaf(path->nodes[0]); blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); @@ -838,25 +864,23 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret == 0) { err = 0; map_bh(result, inode->i_sb, blocknr); - btrfs_release_path(root, &path); goto out; } /* are we inside the extent that was found? */ - found_key = &leaf->items[path.slots[0]].key; + found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != objectid || btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); goto allocate; } - extent_start = btrfs_disk_key_offset(&leaf->items[path.slots[0]].key); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); - btrfs_release_path(root, &path); if (iblock >= extent_start && iblock < extent_end) { err = 0; map_bh(result, inode->i_sb, blocknr + iblock - extent_start); @@ -880,6 +904,8 @@ allocate: map_bh(result, inode->i_sb, blocknr); out: + btrfs_release_path(root, path); + btrfs_free_path(path); if (trans) btrfs_end_transaction(trans, root); return err; -- cgit v1.2.3 From 7cfcc17e7f2334a0503331264c1340de4e564c85 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 14:53:59 -0400 Subject: Btrfs: corruptions fixed Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 -- fs/btrfs/disk-io.c | 21 +++------------------ fs/btrfs/inode-map.c | 43 +++++++++++++++++++++++++------------------ fs/btrfs/super.c | 9 +++++---- fs/btrfs/transaction.c | 2 +- 5 files changed, 34 insertions(+), 43 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index a50a1c44eabd..783f54c2a16c 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -62,7 +62,6 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (!bits) return 0; clear_bit(bit_slot, bits + 1); -#if 0 for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { if (bits[i]) { empty = 0; @@ -74,7 +73,6 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) BUG_ON(!bits); kmem_cache_free(btrfs_bit_radix_cachep, bits); } -#endif return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2dbd55084a4e..a227e94b9e88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,8 +23,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_find_get_block(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -58,14 +56,11 @@ out_unlock: } page_cache_release(page); return ret; -#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_getblk(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -103,7 +98,6 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; -#endif } static sector_t max_block(struct block_device *bdev) @@ -186,7 +180,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { -#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; @@ -201,7 +194,6 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); -#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -218,8 +210,6 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_bread(root->fs_info->sb, blocknr); -#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -243,8 +233,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) fail: brelse(bh); return NULL; - -#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -323,8 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; - fs_info->btree_inode = NULL; -#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; @@ -332,7 +318,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); -#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { @@ -435,8 +420,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - // iput(root->fs_info->btree_inode); + truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -447,6 +432,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - // brelse(buf); + brelse(buf); } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index fa4adb81b149..b3de823eb628 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -10,7 +10,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid) { - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_key key; int ret; u64 hole_size = 0; @@ -31,20 +31,22 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); search_key.offset = 0; - btrfs_init_path(&path); + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); start_found = 0; - ret = btrfs_search_slot(trans, root, &search_key, &path, 0, 0); + ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); if (ret < 0) goto error; - if (path.slots[0] > 0) - path.slots[0]--; + if (path->slots[0] > 0) + path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path.nodes[0]); - slot = path.slots[0]; + l = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - ret = btrfs_next_leaf(root, &path); + ret = btrfs_next_leaf(root, path); if (ret == 0) continue; if (ret < 0) @@ -72,17 +74,19 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } start_found = 1; last_ino = key.objectid + 1; - path.slots[0]++; + path->slots[0]++; } // FIXME -ENOSPC found: root->fs_info->last_inode_alloc = *objectid; root->fs_info->last_inode_alloc_dirid = dirid; - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); BUG_ON(*objectid < search_start); return 0; error: - btrfs_release_path(root, &path); + btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -91,7 +95,7 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, u64 objectid, struct btrfs_key *location) { int ret = 0; - struct btrfs_path path; + struct btrfs_path *path; struct btrfs_inode_map_item *inode_item; struct btrfs_key key; struct btrfs_root *inode_root = fs_root->fs_info->inode_root; @@ -100,18 +104,21 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); key.offset = 0; - btrfs_init_path(&path); - ret = btrfs_insert_empty_item(trans, inode_root, &path, &key, + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_insert_empty_item(trans, inode_root, path, &key, sizeof(struct btrfs_inode_map_item)); if (ret) goto out; - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path.nodes[0]), - path.slots[0], struct btrfs_inode_map_item); + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); - btrfs_mark_buffer_dirty(path.nodes[0]); + btrfs_mark_buffer_dirty(path->nodes[0]); out: - btrfs_release_path(inode_root, &path); + btrfs_release_path(inode_root, path); + btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c260fcad17b3..9fd87f5edbc9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -49,6 +49,7 @@ static void btrfs_read_locked_inode(struct inode *inode) ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); if (ret) { btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); return; @@ -215,7 +216,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } btrfs_release_path(root, path); - btrfs_free_path(path); /* now the directory is empty */ err = btrfs_unlink_trans(trans, root, dir, dentry); @@ -223,6 +223,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_size = 0; } out: + btrfs_release_path(root, path); + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); if (ret && !err) @@ -800,11 +802,10 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; if (!wait) { - // filemap_flush(root->fs_info->btree_inode->i_mapping); - filemap_flush(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 72b52e1e0b1b..2b0f98c92123 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -88,7 +88,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); return 0; } -- cgit v1.2.3 From 2da566edd8ce32ae2952b863ee634bcc2e7d63c1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 15:43:21 -0400 Subject: Btrfs: csum_verify_file_block locking fix Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 2 -- fs/btrfs/file-item.c | 2 ++ fs/btrfs/super.c | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a227e94b9e88..39b117a57a54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -160,7 +160,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; - return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d785b721b461..cd40a08dd8a6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -249,8 +249,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (extent_root->fs_info->last_insert.objectid > blocknr) - extent_root->fs_info->last_insert.objectid = blocknr; if (ret) BUG(); } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index e7510ac5559d..2c98b620c594 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -110,6 +110,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); if (ret) goto fail; @@ -123,6 +124,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, fail: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9fd87f5edbc9..fbc26eafc58c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1316,7 +1316,6 @@ static void btrfs_destroy_inode(struct inode *inode) WARN_ON(ei->magic != 0xDEADBEEF); WARN_ON(ei->magic2 != 0xDEADBEAF); WARN_ON(!list_empty(&inode->i_dentry)); - WARN_ON(inode->i_ino == 1); WARN_ON(inode->i_data.nrpages); ei->magic = 0; -- cgit v1.2.3 From df24a2b9c9bcef3348e4b1a8f206cd484a248d36 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 09:36:31 -0400 Subject: Btrfs: early inline file data code Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 +++-- fs/btrfs/super.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 248 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 453ce835209a..9ef65e2ecf51 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,19 +16,24 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); -struct btrfs_path *btrfs_alloc_path(void) +inline void btrfs_init_path(struct btrfs_path *p) { - return kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); + memset(p, 0, sizeof(*p)); } -void btrfs_free_path(struct btrfs_path *p) +struct btrfs_path *btrfs_alloc_path(void) { - kmem_cache_free(btrfs_path_cachep, p); + struct btrfs_path *path; + path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); + if (path) + btrfs_init_path(path); + return path; } -inline void btrfs_init_path(struct btrfs_path *p) +void btrfs_free_path(struct btrfs_path *p) { - memset(p, 0, sizeof(*p)); + btrfs_release_path(NULL, p); + kmem_cache_free(btrfs_path_cachep, p); } void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) @@ -77,6 +82,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); } btrfs_block_release(root, buf); + mark_buffer_dirty(cow); *cow_ret = cow; return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index fbc26eafc58c..68127b8462ab 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -816,6 +816,73 @@ printk("btrfs sync_fs\n"); return 0; } +static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int num_bytes = result->b_size; + int item_size; + int ret; + u64 pos; + char *ptr; + int copy_size; + int err = 0; + char *safe_ptr; + char *data_ptr; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + WARN_ON(create); + if (create) { + return 0; + } + pos = iblock << inode->i_blkbits; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); + ptr = kmap(result->b_page); + safe_ptr = ptr; + ptr += (pos & (PAGE_CACHE_SIZE -1)); +again: + key.offset = pos; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret) { + if (ret < 0) + err = ret; + else + err = 0; + goto out; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + item_size = btrfs_item_size(leaf->items + path->slots[0]); + copy_size = min(num_bytes, item_size); + data_ptr = btrfs_item_ptr(leaf, path->slots[0], char); + WARN_ON(safe_ptr + PAGE_CACHE_SIZE < ptr + copy_size); + memcpy(ptr, data_ptr, copy_size); + pos += copy_size; + num_bytes -= copy_size; + WARN_ON(num_bytes < 0); + ptr += copy_size; + btrfs_release_path(root, path); + if (num_bytes != 0) { + if (pos >= i_size_read(inode)) + memset(ptr, 0, num_bytes); + else + goto again; + } + set_buffer_uptodate(result); + map_bh(result, inode->i_sb, 0); + err = 0; +out: + btrfs_free_path(path); + kunmap(result->b_page); + return err; +} + static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -918,7 +985,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, int err; struct btrfs_root *root = btrfs_sb(inode->i_sb); mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); + // err = btrfs_get_block_lock(inode, iblock, result, create); + err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1177,6 +1245,170 @@ out: return num_written ? num_written : err; } +static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, + struct page *page, loff_t pos, + size_t offset, size_t write_bytes) +{ + struct btrfs_path *path; + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_key found_key; + int ret; + size_t copy_size = 0; + char *dst = NULL; + int err = 0; + size_t num_written = 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); + +again: + key.offset = pos; + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + copy_size = btrfs_item_size(leaf->items + path->slots[0]); + dst = btrfs_item_ptr(leaf, path->slots[0], char); + copy_size = min(write_bytes, copy_size); + goto copyit; + } else { + int slot = path->slots[0]; + if (slot > 0) { + slot--; + } + // FIXME find max key + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[slot].key); + if (found_key.objectid != inode->i_ino) + goto insert; + if (btrfs_key_type(&found_key) != BTRFS_INLINE_DATA_KEY) + goto insert; + copy_size = btrfs_item_size(leaf->items + slot); + if (found_key.offset + copy_size <= pos) + goto insert; + dst = btrfs_item_ptr(leaf, path->slots[0], char); + dst += pos - found_key.offset; + copy_size = copy_size - (pos - found_key.offset); + BUG_ON(copy_size < 0); + copy_size = min(write_bytes, copy_size); + WARN_ON(copy_size == 0); + goto copyit; + } +insert: + btrfs_release_path(root, path); + copy_size = min(write_bytes, (size_t)512); + ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); + BUG_ON(ret); + dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char); +copyit: + WARN_ON(copy_size == 0); + WARN_ON(dst + copy_size > + btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char) + + btrfs_item_size(btrfs_buffer_leaf(path->nodes[0])->items + + path->slots[0])); + btrfs_memcpy(root, path->nodes[0]->b_data, dst, + page_address(page) + offset, copy_size); + mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + pos += copy_size; + offset += copy_size; + num_written += copy_size; + write_bytes -= copy_size; + if (write_bytes) + goto again; +out: + btrfs_free_path(path); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return num_written ? num_written : err; +} + +static ssize_t btrfs_file_inline_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = btrfs_sb(inode->i_sb); + unsigned long page_index; + + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + mutex_lock(&inode->i_mutex); + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + struct page *page; + + page_index = pos >> PAGE_CACHE_SHIFT; + page = grab_cache_page(inode->i_mapping, page_index); + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + BUG_ON(ret); + lock_page(page); + } + ret = btrfs_copy_from_user(pos, 1, + write_bytes, &page, buf); + BUG_ON(ret); + write_bytes = inline_one_page(root, inode, page, pos, + offset, write_bytes); + SetPageUptodate(page); + if (write_bytes > 0 && pos + write_bytes > inode->i_size) { + i_size_write(inode, pos + write_bytes); + mark_inode_dirty(inode); + } + page_cache_release(page); + unlock_page(page); + if (write_bytes < 0) + goto out_unlock; + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } +out_unlock: + mutex_unlock(&inode->i_mutex); +out: + *ppos = pos; + current->backing_dev_info = NULL; + return num_written ? num_written : err; +} + static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) { @@ -1420,7 +1652,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - .readpages = btrfs_readpages, + // .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, @@ -1434,8 +1666,8 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = btrfs_file_aio_read, - .write = btrfs_file_write, + .aio_read = generic_file_aio_read, + .write = btrfs_file_inline_write, .mmap = generic_file_mmap, .open = generic_file_open, }; -- cgit v1.2.3 From d4dbff953e1f6f4079126c0404cc24f2ef14e925 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 14:08:15 -0400 Subject: Btrfs: support for items bigger than 1/2 the blocksize Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 132 ++++++++++++++++++++++++++++++++++++++++--------------- fs/btrfs/super.c | 16 +++++-- 2 files changed, 110 insertions(+), 38 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9ef65e2ecf51..864ee423b300 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -6,7 +6,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size); + *root, struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size); static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *dst, struct buffer_head *src); @@ -101,19 +102,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset(leaf->items + nr - 1); } -/* - * The space between the end of the leaf items and - * the start of the leaf data. IOW, how much room - * the leaf has left for both items and data - */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) -{ - int data_end = leaf_data_end(root, leaf); - int nritems = btrfs_header_nritems(&leaf->header); - char *items_end = (char *)(leaf->items + nritems + 1); - return (char *)(btrfs_leaf_data(leaf) + data_end) - (char *)items_end; -} - /* * compare two keys in a memcmp fashion */ @@ -510,8 +498,8 @@ again: if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) == - BTRFS_NODEPTRS_PER_BLOCK(root)) { + if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) @@ -537,7 +525,8 @@ again: p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, l) < sizeof(struct btrfs_item) + ins_len) { - int sret = split_leaf(trans, root, p, ins_len); + int sret = split_leaf(trans, root, key, + p, ins_len); BUG_ON(sret > 0); if (sret) return sret; @@ -825,16 +814,29 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) { int data_len; - int end = start + nr - 1; + int nritems = btrfs_header_nritems(&l->header); + int end = min(nritems, start + nr) - 1; if (!nr) return 0; data_len = btrfs_item_end(l->items + start); data_len = data_len - btrfs_item_offset(l->items + end); data_len += sizeof(struct btrfs_item) * nr; + WARN_ON(data_len < 0); return data_len; } +/* + * The space between the end of the leaf items and + * the start of the leaf data. IOW, how much room + * the leaf has left for both items and data + */ +int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +{ + int nritems = btrfs_header_nritems(&leaf->header); + return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); +} + /* * push some data in the path leaf to the right, trying to free up at * least data_size bytes. returns zero if the push worked, nonzero otherwise @@ -1084,7 +1086,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root * returns 0 if all went well and < 0 on failure. */ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int data_size) + *root, struct btrfs_key *ins_key, + struct btrfs_path *path, int data_size) { struct buffer_head *l_buf; struct btrfs_leaf *l; @@ -1097,8 +1100,10 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root int data_copy_size; int rt_data_off; int i; - int ret; + int ret = 0; int wret; + int double_split = 0; + struct btrfs_disk_key disk_key; /* first try to make some room by pushing left and right */ wret = push_leaf_left(trans, root, path, data_size); @@ -1127,26 +1132,58 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root mid = (nritems + 1)/ 2; right_buffer = btrfs_alloc_free_block(trans, root); BUG_ON(!right_buffer); - BUG_ON(mid == nritems); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - if (mid <= slot) { - /* FIXME, just alloc a new leaf here */ - if (leaf_space_used(l, mid, nritems - mid) + space_needed > - BTRFS_LEAF_DATA_SIZE(root)) - BUG(); - } else { - /* FIXME, just alloc a new leaf here */ - if (leaf_space_used(l, 0, mid + 1) + space_needed > - BTRFS_LEAF_DATA_SIZE(root)) - BUG(); - } - btrfs_set_header_nritems(&right->header, nritems - mid); btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + if (mid <= slot) { + if (nritems == 1 || + leaf_space_used(l, mid, nritems - mid) + space_needed > + BTRFS_LEAF_DATA_SIZE(root)) { + if (slot >= nritems) { + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1] + 1, 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + path->slots[1] += 1; + return ret; + } + mid = slot; + double_split = 1; + } + } else { + if (leaf_space_used(l, 0, mid + 1) + space_needed > + BTRFS_LEAF_DATA_SIZE(root)) { + if (slot == 0) { + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1] - 1, 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + path->slots[1] -= 1; + return ret; + } + mid = slot; + double_split = 1; + } + } + btrfs_set_header_nritems(&right->header, nritems - mid); data_copy_size = btrfs_item_end(l->items + mid) - leaf_data_end(root, l); btrfs_memcpy(root, right, right->items, l->items + mid, @@ -1180,6 +1217,31 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } else btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); + + if (!double_split) + return ret; + right_buffer = btrfs_alloc_free_block(trans, root); + BUG_ON(!right_buffer); + right = btrfs_buffer_leaf(right_buffer); + memset(&right->header, 0, sizeof(right->header)); + btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_level(&right->header, 0); + btrfs_set_header_parentid(&right->header, + btrfs_header_parentid(btrfs_buffer_header(root->node))); + btrfs_cpu_key_to_disk(&disk_key, ins_key); + btrfs_set_header_nritems(&right->header, 0); + wret = insert_ptr(trans, root, path, + &disk_key, + right_buffer->b_blocknr, + path->slots[1], 1); + if (wret) + ret = wret; + btrfs_block_release(root, path->nodes[0]); + path->nodes[0] = right_buffer; + path->slots[0] = 0; + check_node(root, path, 1); + check_leaf(root, path, 0); return ret; } @@ -1220,9 +1282,9 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < - sizeof(struct btrfs_item) + data_size) + sizeof(struct btrfs_item) + data_size) { BUG(); - + } slot = path->slots[0]; BUG_ON(slot < 0); if (slot != nritems) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68127b8462ab..0afb6cece82c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -136,8 +136,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, err: btrfs_release_path(root, path); btrfs_free_path(path); - if (ret == 0) + if (ret == 0) { inode_dec_link_count(dentry->d_inode); + dir->i_size -= name_len; + mark_inode_dirty(dir); + } return ret; } @@ -290,6 +293,10 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = 0; + /* + * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys + * or extent data + */ btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); while(1) { btrfs_init_path(path); @@ -306,6 +313,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && + btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY && btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) @@ -1036,7 +1044,7 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - nobh_truncate_page(inode->i_mapping, inode->i_size); + // nobh_truncate_page(inode->i_mapping, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); @@ -1309,7 +1317,9 @@ again: } insert: btrfs_release_path(root, path); - copy_size = min(write_bytes, (size_t)512); + copy_size = min(write_bytes, + (size_t)BTRFS_LEAF_DATA_SIZE(root) - + sizeof(struct btrfs_item) * 4); ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); BUG_ON(ret); dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), -- cgit v1.2.3 From e8f05c4508aed906146919f2efa89242a7f90a8b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 14:30:09 -0400 Subject: Btrfs: disable inline data code for now Signed-off-by: Chris Mason --- fs/btrfs/super.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0afb6cece82c..822243650ae9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -824,6 +824,7 @@ printk("btrfs sync_fs\n"); return 0; } +#if 0 static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -890,6 +891,7 @@ out: kunmap(result->b_page); return err; } +#endif static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) @@ -993,8 +995,8 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, int err; struct btrfs_root *root = btrfs_sb(inode->i_sb); mutex_lock(&root->fs_info->fs_mutex); - // err = btrfs_get_block_lock(inode, iblock, result, create); - err = btrfs_get_block_inline(inode, iblock, result, create); + err = btrfs_get_block_lock(inode, iblock, result, create); + // err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1002,13 +1004,11 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - WARN_ON(1); return nobh_prepare_write(page, from, to, btrfs_get_block); } static int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - WARN_ON(1); return nobh_commit_write(file, page, from, to); } @@ -1044,7 +1044,7 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - // nobh_truncate_page(inode->i_mapping, inode->i_size); + nobh_truncate_page(inode->i_mapping, inode->i_size); /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); @@ -1253,6 +1253,7 @@ out: return num_written ? num_written : err; } +#if 0 static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, struct page *page, loff_t pos, size_t offset, size_t write_bytes) @@ -1418,6 +1419,7 @@ out: current->backing_dev_info = NULL; return num_written ? num_written : err; } +#endif static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) @@ -1662,7 +1664,7 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - // .readpages = btrfs_readpages, + .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, @@ -1676,8 +1678,8 @@ static struct inode_operations btrfs_file_inode_operations = { static struct file_operations btrfs_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = btrfs_file_inline_write, + .aio_read = btrfs_file_aio_read, + .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, }; -- cgit v1.2.3 From bae45de03c4d54a9893dedf8a015beb2608b896a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 21:22:22 -0400 Subject: Btrfs: add dir inode index Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 15 ++++++++------- fs/btrfs/dir-item.c | 19 +++++++++++++++++++ fs/btrfs/super.c | 4 ++-- fs/btrfs/sysfs.c | 4 ++++ 4 files changed, 33 insertions(+), 9 deletions(-) create mode 100644 fs/btrfs/sysfs.c (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 41cc013ef08d..87c56222a620 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -282,41 +282,42 @@ struct btrfs_root { * for every name in a directory. */ #define BTRFS_DIR_ITEM_KEY 2 +#define BTRFS_DIR_INDEX_KEY 3 /* * inline data is file data that fits in the btree. */ -#define BTRFS_INLINE_DATA_KEY 3 +#define BTRFS_INLINE_DATA_KEY 4 /* * extent data is for data that can't fit in the btree. It points to * a (hopefully) huge chunk of disk */ -#define BTRFS_EXTENT_DATA_KEY 4 +#define BTRFS_EXTENT_DATA_KEY 5 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 5 +#define BTRFS_CSUM_ITEM_KEY 6 /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 6 +#define BTRFS_ROOT_ITEM_KEY 7 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 7 +#define BTRFS_EXTENT_ITEM_KEY 8 /* * the inode map records which inode numbers are in use and where * they actually live on disk */ -#define BTRFS_INODE_MAP_ITEM_KEY 8 +#define BTRFS_INODE_MAP_ITEM_KEY 9 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 9 +#define BTRFS_STRING_ITEM_KEY 10 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index baceb1da609f..2a87e23ac9c9 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -37,6 +37,25 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root name_ptr = (char *)(dir_item + 1); btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); + + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + // FIXME clear the dirindex bit + if (ret) + goto out; + + dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_dir_item); + btrfs_set_dir_objectid(dir_item, objectid); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + btrfs_set_dir_name_len(dir_item, name_len); + name_ptr = (char *)(dir_item + 1); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 822243650ae9..4b042460e875 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -447,7 +447,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = filp->f_pos; path = btrfs_alloc_path(); btrfs_init_path(path); @@ -477,7 +477,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_ITEM_KEY) + if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c new file mode 100644 index 000000000000..ed8b7e20cfbc --- /dev/null +++ b/fs/btrfs/sysfs.c @@ -0,0 +1,4 @@ +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" -- cgit v1.2.3 From 5f26f772e5c4e833ffcb0599f54deda466d2a3e5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 10:38:44 -0400 Subject: Btrfs: more inode indexed directory work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 +++ fs/btrfs/dir-item.c | 18 ++++++++++ fs/btrfs/extent-tree.c | 3 +- fs/btrfs/super.c | 90 ++++++++++++++++++++++++++------------------------ 4 files changed, 70 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 87c56222a620..61d7b4738af6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -921,6 +921,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod); int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len); /* inode-map.c */ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 2a87e23ac9c9..62d0c0916a73 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -80,6 +80,24 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + return ret; +} + int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 688aa861a928..7c21f63f1b93 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -224,6 +224,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); if (ret) { printk("failed to find %Lu\n", key.objectid); @@ -363,8 +364,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (root->fs_info->last_insert.objectid > search_start) search_start = root->fs_info->last_insert.objectid; - path = btrfs_alloc_path(); - check_failed: btrfs_init_path(path); ins->objectid = search_start; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4b042460e875..d776b29a1676 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -130,6 +130,13 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item); objectid = btrfs_dir_objectid(di); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + btrfs_release_path(root, path); + ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, -1); + BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; @@ -138,7 +145,7 @@ err: btrfs_free_path(path); if (ret == 0) { inode_dec_link_count(dentry->d_inode); - dir->i_size -= name_len; + dir->i_size -= name_len * 2; mark_inode_dirty(dir); } return ret; @@ -168,8 +175,10 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; + int found_type; struct btrfs_leaf *leaf; + char *goodnames = ".."; path = btrfs_alloc_path(); BUG_ON(!path); @@ -178,46 +187,42 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) trans = btrfs_start_transaction(root, 1); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } + key.flags = (u32)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + err = -ENOENT; + goto out; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino) { + err = -ENOENT; + goto out; + } + if ((found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY) || + (!btrfs_match_dir_item_name(root, path, goodnames, 2) && + !btrfs_match_dir_item_name(root, path, goodnames, 1))) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); - BUG_ON(ret == 0); - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) { - err = -ENOENT; - goto out; - } - if (btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != 2) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - key.offset = 1; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - if (ret > 0) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - err = ret; - goto out; + if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) + break; + btrfs_release_path(root, path); } + ret = 0; btrfs_release_path(root, path); /* now the directory is empty */ @@ -676,7 +681,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, dentry->d_parent->d_inode->i_ino, inode->i_ino, 0); if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len; + dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), dentry->d_parent->d_inode); } @@ -754,7 +759,7 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, dir->i_ino, 1); if (ret) goto error; - inode->i_size = 3; + inode->i_size = 6; ret = btrfs_update_inode(trans, root, inode); error: return ret; @@ -915,7 +920,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (create) trans = btrfs_start_transaction(root, 1); - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, iblock << inode->i_blkbits, 0); -- cgit v1.2.3 From 7fcde0e3298c00ee18e2ae7f01c67f99133ef7be Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 12:13:21 -0400 Subject: Btrfs: finish off inode indexing in dirs, add overflows Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 33 ++++++++++++++++++++++++ fs/btrfs/dir-item.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/super.c | 11 ++++---- 3 files changed, 107 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 61d7b4738af6..794f7e4f1c07 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -270,6 +270,10 @@ struct btrfs_root { #define BTRFS_KEY_TYPE_MAX 256 #define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_KEY_OVERFLOW_MAX 128 +#define BTRFS_KEY_OVERFLOW_SHIFT 8 +#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT) + /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -588,6 +592,19 @@ static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, disk->flags = cpu_to_le32(val); } +static inline u32 btrfs_key_overflow(struct btrfs_key *key) +{ + u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK; + return over >> BTRFS_KEY_OVERFLOW_SHIFT; +} + +static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) +{ + BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + over = over << BTRFS_KEY_OVERFLOW_SHIFT; + key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; +} + static inline u32 btrfs_key_type(struct btrfs_key *key) { return key->flags & BTRFS_KEY_TYPE_MASK; @@ -612,6 +629,22 @@ static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) btrfs_set_disk_key_flags(key, flags); } +static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) +{ + u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; + return over >> BTRFS_KEY_OVERFLOW_SHIFT; +} + +static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, + u32 over) +{ + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + over = over << BTRFS_KEY_OVERFLOW_SHIFT; + flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + btrfs_set_disk_key_flags(key, flags); +} + static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 62d0c0916a73..b1629a5d73c8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,6 +4,26 @@ #include "hash.h" #include "transaction.h" +int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, struct btrfs_key + *cpu_key, u32 data_size) +{ + int overflow; + int ret; + + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); + overflow = btrfs_key_overflow(cpu_key); + + while(ret == -EEXIST && overflow < BTRFS_KEY_OVERFLOW_MAX) { + overflow++; + btrfs_set_key_overflow(cpu_key, overflow); + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, cpu_key, + data_size); + } + return ret; +} + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, u64 objectid, u8 type) @@ -23,7 +43,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = insert_with_overflow(trans, root, path, &key, data_size); if (ret) goto out; @@ -41,7 +61,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; - ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); + ret = insert_with_overflow(trans, root, path, &key, data_size); // FIXME clear the dirindex bit if (ret) goto out; @@ -70,14 +90,40 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - return ret; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ret; + if (ret > 0) { + if (path->slots[0] == 0) + return 1; + path->slots[0]--; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != key.offset) + return 1; + + if (btrfs_match_dir_item_name(root, path, name, name_len)) + return 0; + + if (btrfs_disk_key_overflow(found_key) == 0) + return 1; + btrfs_release_path(root, path); + } + return 1; } int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, @@ -89,13 +135,31 @@ int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_disk_key *found_key; + struct btrfs_leaf *leaf; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); key.offset = objectid; ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - return ret; + if (ret < 0) + return ret; + if (ret > 0) { + if (path->slots[0] == 0) + return 1; + path->slots[0]--; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY) + return 1; + if (btrfs_disk_key_offset(found_key) == objectid) + return 0; + return 1; } int btrfs_match_dir_item_name(struct btrfs_root *root, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d776b29a1676..4fd2b168b2c4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -486,19 +486,18 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; - + filp->f_pos = btrfs_disk_key_offset(&item->key); advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), btrfs_dir_objectid(di), d_type); - if (over) { - filp->f_pos = btrfs_disk_key_offset(&item->key); - break; - } - filp->f_pos = btrfs_disk_key_offset(&item->key) + 1; + if (over) + goto nopos; } + filp->f_pos++; +nopos: ret = 0; err: btrfs_release_path(root, path); -- cgit v1.2.3 From 5be6f7f174146d91039a27ebb2f1b4ac599172b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 13:35:25 -0400 Subject: Btrfs: dirindex optimizations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 5 ++++- fs/btrfs/disk-io.c | 6 +++++- fs/btrfs/inode-map.c | 48 +++++++++++++++++++++++++++++++++--------------- fs/btrfs/super.c | 5 +++++ 5 files changed, 50 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 794f7e4f1c07..6d40758b893f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -239,6 +239,7 @@ struct btrfs_fs_info { struct radix_tree_root pinned_radix; u64 last_inode_alloc; u64 generation; + u64 highest_inode; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; @@ -970,6 +971,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, int mod); +int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); + /* inode-item.c */ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b1629a5d73c8..0ee9945fb1b0 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -92,6 +92,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; + u32 overflow; key.objectid = dir; key.flags = 0; @@ -119,8 +120,10 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root if (btrfs_match_dir_item_name(root, path, name, name_len)) return 0; - if (btrfs_disk_key_overflow(found_key) == 0) + overflow = btrfs_disk_key_overflow(found_key); + if (overflow == 0) return 1; + btrfs_set_key_overflow(&key, overflow - 1); btrfs_release_path(root, path); } return 1; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de9ee3aa0aad..5230554380d1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,6 +311,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; + fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -360,12 +361,15 @@ printk("failed2\n"); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); - mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + if (ret == 0) + fs_info->highest_inode = fs_info->last_inode_alloc; + mutex_unlock(&fs_info->fs_mutex); return root; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 329edb42897e..f665221409ac 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -3,6 +3,37 @@ #include "disk-io.h" #include "transaction.h" +int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid) +{ + struct btrfs_path *path; + int ret; + struct btrfs_leaf *l; + struct btrfs_root *root = fs_root->fs_info->inode_root; + struct btrfs_key search_key; + int slot; + + path = btrfs_alloc_path(); + BUG_ON(!path); + + search_key.objectid = (u64)-1; + search_key.offset = (u64)-1; + ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + if (path->slots[0] > 0) { + slot = path->slots[0] - 1; + l = btrfs_buffer_leaf(path->nodes[0]); + *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + } else { + *objectid = BTRFS_FIRST_FREE_OBJECTID; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + /* * walks the btree of allocated inodes and find a hole. */ @@ -28,21 +59,6 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); search_start = fs_root->fs_info->last_inode_alloc; - if (search_start == 0) { - struct btrfs_disk_key *last_key; - btrfs_init_path(path); - search_key.objectid = (u64)-1; - search_key.offset = (u64)-1; - ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0); - if (ret < 0) - goto error; - BUG_ON(ret == 0); - if (path->slots[0] > 0) - path->slots[0]--; - l = btrfs_buffer_leaf(path->nodes[0]); - last_key = &l->items[path->slots[0]].key; - search_start = btrfs_disk_key_objectid(last_key); - } search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.offset = 0; @@ -129,6 +145,8 @@ int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, path->slots[0], struct btrfs_inode_map_item); btrfs_cpu_key_to_disk(&inode_item->key, location); btrfs_mark_buffer_dirty(path->nodes[0]); + if (objectid > fs_root->fs_info->highest_inode) + fs_root->fs_info->highest_inode = objectid; out: btrfs_release_path(inode_root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4fd2b168b2c4..d4ee78046b86 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -482,6 +482,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; + if (btrfs_disk_key_offset(&item->key) > + root->fs_info->highest_inode) { +printk("stopping at highest inode %Lu\n", root->fs_info->highest_inode); + break; + } if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) -- cgit v1.2.3 From 3eb0314dc1053b1ae617dcc8d6d93f776c5baa31 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 14:28:50 -0400 Subject: Btrfs: uuids Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.c | 12 ++++++++++++ fs/btrfs/super.c | 4 +--- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index db986c46a554..ea4c38c790b4 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -3,7 +3,6 @@ * Add block mapping tree (simple dm layer) * Make allocator smarter * add a block group to struct inode -* Make directory hashing work on 32 bit * Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 864ee423b300..39b551564fcf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -200,6 +200,10 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { + struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, + sizeof(node->header.fsid))) + BUG(); if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -687,6 +691,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_parentid(&c->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); lower = btrfs_buffer_node(path->nodes[level-1]); + memcpy(c->header.fsid, root->fs_info->disk_super->fsid, + sizeof(c->header.fsid)); if (btrfs_is_leaf(lower)) lower_key = &((struct btrfs_leaf *)lower)->items[0].key; else @@ -780,6 +786,8 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_generation(&split->header, trans->transid); btrfs_set_header_parentid(&split->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(split->header.fsid, root->fs_info->disk_super->fsid, + sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); @@ -1139,6 +1147,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(right->header.fsid, root->fs_info->disk_super->fsid, + sizeof(right->header.fsid)); if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > @@ -1229,6 +1239,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&right->header, 0); btrfs_set_header_parentid(&right->header, btrfs_header_parentid(btrfs_buffer_header(root->node))); + memcpy(right->header.fsid, root->fs_info->disk_super->fsid, + sizeof(right->header.fsid)); btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d4ee78046b86..32224ffdcecd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -483,10 +483,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_offset(&item->key) > - root->fs_info->highest_inode) { -printk("stopping at highest inode %Lu\n", root->fs_info->highest_inode); + root->fs_info->highest_inode) break; - } if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) -- cgit v1.2.3 From d6e4a428eb8f92bbb3537ccabadfb1195efb432b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Apr 2007 15:37:36 -0400 Subject: Btrfs: start of support for many FS volumes Signed-off-by: Chris Mason --- fs/btrfs/TODO | 7 ++ fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 36 ++++---- fs/btrfs/dir-item.c | 14 ++-- fs/btrfs/disk-io.c | 7 +- fs/btrfs/inode-item.c | 26 ++++-- fs/btrfs/print-tree.c | 2 +- fs/btrfs/super.c | 218 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/transaction.c | 9 ++ 9 files changed, 242 insertions(+), 79 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index ea4c38c790b4..73d4b842a316 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -19,3 +19,10 @@ * fsck * Scrub & defrag +--- +metata FS ideas: +mount metadata FS on /sys/fs/btrfs/dev/ +subvolumes in /sys/fs/btrfs/dev/fs/ +snapshots in /sys/fs/btrfs/dev/fs/snapshots +mount -o bind +ioctl to create a snapshot diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e159841650a4..3439ffb467b5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,8 @@ struct btrfs_inode { u32 magic; + struct btrfs_root *root; + struct btrfs_key location; struct inode vfs_inode; u32 magic2; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6d40758b893f..1ff5b99af682 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3,6 +3,7 @@ #include #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -183,13 +184,15 @@ struct btrfs_inline_data_item { } __attribute__ ((__packed__)); struct btrfs_dir_item { - __le64 objectid; + struct btrfs_disk_key location; __le16 flags; __le16 name_len; u8 type; } __attribute__ ((__packed__)); struct btrfs_root_item { + struct btrfs_inode_item inode; + __le64 root_dirid; __le64 blocknr; __le32 flags; __le64 block_limit; @@ -249,6 +252,7 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct crypto_hash *hash_tfm; spinlock_t hash_lock; + struct kobject kobj; }; /* @@ -504,16 +508,6 @@ static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) item->size = cpu_to_le16(val); } -static inline u64 btrfs_dir_objectid(struct btrfs_dir_item *d) -{ - return le64_to_cpu(d->objectid); -} - -static inline void btrfs_set_dir_objectid(struct btrfs_dir_item *d, u64 val) -{ - d->objectid = cpu_to_le64(val); -} - static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) { return le16_to_cpu(d->flags); @@ -724,6 +718,16 @@ static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) item->blocknr = cpu_to_le64(val); } +static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->root_dirid); +} + +static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) +{ + item->root_dirid = cpu_to_le64(val); +} + static inline u32 btrfs_root_refs(struct btrfs_root_item *item) { return le32_to_cpu(item->refs); @@ -950,8 +954,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key); /* dir-item.c */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, u64 - objectid, u8 type); + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type); int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, const char *name, int name_len, int mod); @@ -978,7 +982,8 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, struct btrfs_inode_item *inode_item); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 objectid, int mod); + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod); /* file-item.c */ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, @@ -997,4 +1002,7 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); +/* super.c */ +extern struct subsystem btrfs_subsys; + #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0ee9945fb1b0..7aed9f015b55 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -25,8 +25,8 @@ int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, const char *name, int name_len, u64 dir, u64 - objectid, u8 type) + *root, const char *name, int name_len, u64 dir, + struct btrfs_key *location, u8 type) { int ret = 0; struct btrfs_path *path; @@ -50,17 +50,21 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - btrfs_set_dir_objectid(dir_item, objectid); + btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); + /* FIXME, use some real flag for selecting the extra index */ + if (root == root->fs_info->tree_root) + goto out; + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = objectid; + key.offset = location->objectid; ret = insert_with_overflow(trans, root, path, &key, data_size); // FIXME clear the dirindex bit if (ret) @@ -69,7 +73,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - btrfs_set_dir_objectid(dir_item, objectid); + btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5230554380d1..b9301a5e4608 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -338,7 +338,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) { -printk("failed2\n"); return NULL; } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -369,6 +368,10 @@ printk("failed2\n"); ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; + memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); + kobj_set_kset_s(fs_info, btrfs_subsys); + kobject_set_name(&fs_info->kobj, "%s", sb->s_id); + kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return root; } @@ -430,7 +433,7 @@ int close_ctree(struct btrfs_root *root) kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); - kfree(root->fs_info); + kobject_unregister(&root->fs_info->kobj); kfree(root); return 0; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 6bfa980790c2..b276a3b40a66 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -26,15 +26,27 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root } int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 objectid, int mod) + *root, struct btrfs_path *path, + struct btrfs_key *location, int mod) { - struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + int ret; + int slot; + struct btrfs_leaf *leaf; + struct btrfs_key found_key; - key.objectid = objectid; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; - return btrfs_search_slot(trans, root, &key, path, ins_len, cow); + ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); + if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && + location->offset == (u64)-1 && path->slots[0] != 0) { + slot = path->slots[0] - 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + if (found_key.objectid == location->objectid && + btrfs_key_type(&found_key) == btrfs_key_type(location)) { + path->slots[0]--; + return 0; + } + } + return ret; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index c8ee938c1251..f0da65c4f96b 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -38,7 +38,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); printk("\t\tdir oid %Lu flags %u type %u\n", - btrfs_dir_objectid(di), + btrfs_disk_key_objectid(&di->location), btrfs_dir_flags(di), btrfs_dir_type(di)); printk("\t\tname %.*s\n", diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 32224ffdcecd..66d9fb2288c3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -16,9 +16,23 @@ #include "transaction.h" #include "btrfs_inode.h" +void btrfs_fsinfo_release(struct kobject *obj) +{ + struct btrfs_fs_info *fsinfo = container_of(obj, + struct btrfs_fs_info, kobj); + kfree(fsinfo); +} + +struct kobj_type btrfs_fsinfo_ktype = { + .release = btrfs_fsinfo_release, +}; + +decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); + #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_dir_ro_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; static struct inode_operations btrfs_file_inode_operations; @@ -37,7 +51,8 @@ static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; struct btrfs_inode_item *inode_item; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key location; int ret; path = btrfs_alloc_path(); @@ -46,13 +61,12 @@ static void btrfs_read_locked_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); check_inode(inode); - ret = btrfs_lookup_inode(NULL, root, path, inode->i_ino, 0); + + memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - make_bad_inode(inode); - return; + goto make_bad; } check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), @@ -73,7 +87,6 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); - btrfs_release_path(root, path); btrfs_free_path(path); inode_item = NULL; @@ -92,8 +105,11 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_file_inode_operations; break; case S_IFDIR: - inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + if (root == root->fs_info->tree_root) + inode->i_op = &btrfs_dir_ro_inode_operations; + else + inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: // inode->i_op = &page_symlink_inode_operations; @@ -101,6 +117,12 @@ static void btrfs_read_locked_inode(struct inode *inode) } check_inode(inode); return; + +make_bad: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); } static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, @@ -128,7 +150,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - objectid = btrfs_dir_objectid(di); + objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -157,7 +179,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; int ret; - root = btrfs_sb(dir->i_sb); + root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_unlink_trans(trans, root, dir, dentry); @@ -171,7 +193,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int err; int ret; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_path *path; struct btrfs_key key; struct btrfs_trans_handle *trans; @@ -268,7 +290,8 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, path, objectid, -1); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, -1); BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -355,7 +378,7 @@ error: static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret; truncate_inode_pages(&inode->i_data, 0); @@ -378,13 +401,13 @@ no_delete: } static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, - ino_t *ino) + struct btrfs_key *location) { const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; struct btrfs_dir_item *di; struct btrfs_path *path; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int ret; path = btrfs_alloc_path(); @@ -393,13 +416,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { - *ino = 0; + location->objectid = 0; ret = 0; goto out; } di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_dir_item); - *ino = btrfs_dir_objectid(di); + btrfs_disk_key_to_cpu(location, &di->location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -407,26 +430,76 @@ out: return ret; } +int fixup_tree_root_location(struct btrfs_root *root, + struct btrfs_key *location, + struct btrfs_root **sub_root) +{ + struct btrfs_path *path; + struct btrfs_root_item *ri; + int ret; + + if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) + return 0; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + + ret = btrfs_lookup_inode(NULL, root, path, location, 0); + if (ret) + goto out; + ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_root_item); + location->objectid = btrfs_root_dirid(ri); + location->flags = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + location->offset = 0; + /* FIXME properly select the root */ + *sub_root = root->fs_info->fs_root; +out: + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode * inode; - struct btrfs_root *root = btrfs_sb(dir->i_sb); - ino_t ino; + struct btrfs_inode *bi = BTRFS_I(dir); + struct btrfs_root *root = bi->root; + struct btrfs_root *sub_root = root; + struct btrfs_key location; int ret; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_inode_by_name(dir, dentry, &ino); + ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); inode = NULL; - if (ino) { - inode = iget(dir->i_sb, ino); + if (location.objectid) { + ret = fixup_tree_root_location(root, &location, &sub_root); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + inode = iget_locked(dir->i_sb, location.objectid); if (!inode) return ERR_PTR(-EACCES); + if (inode->i_state & I_NEW) { + BTRFS_I(inode)->root = sub_root; + memcpy(&BTRFS_I(inode)->location, &location, + sizeof(location)); + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } check_inode(inode); } check_inode(dir); @@ -436,7 +509,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; @@ -448,11 +521,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; + int key_type = BTRFS_DIR_INDEX_KEY; + + /* FIXME, use a real flag for deciding about the key type */ + if (root->fs_info->tree_root == root) + key_type = BTRFS_DIR_ITEM_KEY; mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; path = btrfs_alloc_path(); btrfs_init_path(path); @@ -482,10 +560,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (btrfs_disk_key_offset(&item->key) > + if (key_type == BTRFS_DIR_INDEX_KEY && + btrfs_disk_key_offset(&item->key) > root->fs_info->highest_inode) break; - if (btrfs_disk_key_type(&item->key) != BTRFS_DIR_INDEX_KEY) + if (btrfs_disk_key_type(&item->key) != key_type) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; @@ -495,7 +574,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), - btrfs_dir_objectid(di), d_type); + btrfs_disk_key_objectid(&di->location), d_type); if (over) goto nopos; } @@ -527,6 +606,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct dentry * root_dentry; struct btrfs_super_block *disk_super; struct btrfs_root *root; + struct btrfs_inode *bi; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; @@ -546,6 +626,13 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) btrfs_super_root_dir(disk_super)); inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); + bi = BTRFS_I(inode); + bi->location.objectid = inode->i_ino; + bi->location.offset = 0; + bi->location.flags = 0; + bi->root = root->fs_info->tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); + if (!inode) return -ENOMEM; if (inode->i_state & I_NEW) { @@ -594,7 +681,8 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, inode->i_ino, 1); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); if (ret) { if (ret > 0) ret = -ENOENT; @@ -616,7 +704,7 @@ failed: static int btrfs_write_inode(struct inode *inode, int wait) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret; @@ -637,8 +725,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, { struct inode *inode; struct btrfs_inode_item inode_item; - struct btrfs_root *root = btrfs_sb(dir->i_sb); - struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_key *key; int ret; u64 objectid; @@ -646,6 +734,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + BTRFS_I(inode)->root = BTRFS_I(dir)->root; + key = &BTRFS_I(inode)->location; check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -658,11 +748,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; fill_inode_item(&inode_item, inode); - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, &key); + key->objectid = objectid; + key->flags = 0; + key->offset = 0; + btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, key); BUG_ON(ret); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); @@ -678,13 +768,20 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode) { int ret; - ret = btrfs_insert_dir_item(trans, btrfs_sb(inode->i_sb), + struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - inode->i_ino, 0); + &key, 0); if (ret == 0) { dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; - ret = btrfs_update_inode(trans, btrfs_sb(inode->i_sb), + ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } check_inode(inode); @@ -710,7 +807,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode; int err; int drop_inode = 0; @@ -747,18 +844,26 @@ out_unlock: static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int ret; char buf[2]; + struct btrfs_key key; + buf[0] = '.'; buf[1] = '.'; + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, - inode->i_ino, 1); + &key, 1); if (ret) goto error; + key.objectid = dir->i_ino; ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, - dir->i_ino, 1); + &key, 1); if (ret) goto error; inode->i_size = 6; @@ -771,7 +876,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct inode *inode; struct btrfs_trans_handle *trans; - struct btrfs_root *root = btrfs_sb(dir->i_sb); + struct btrfs_root *root = BTRFS_I(dir)->root; int err = 0; int drop_on_err = 0; @@ -910,7 +1015,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_end = 0; u64 objectid = inode->i_ino; struct btrfs_path *path; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; @@ -999,7 +1104,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { int err; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; mutex_lock(&root->fs_info->fs_mutex); err = btrfs_get_block_lock(inode, iblock, result, create); // err = btrfs_get_block_inline(inode, iblock, result, create); @@ -1041,7 +1146,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) static void btrfs_truncate(struct inode *inode) { - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret; struct btrfs_trans_handle *trans; @@ -1208,7 +1313,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int err = 0; int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[1]; if (file->f_flags & O_DIRECT) @@ -1365,7 +1470,7 @@ static ssize_t btrfs_file_inline_write(struct file *file, int err = 0; int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = btrfs_sb(inode->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long page_index; if (file->f_flags & O_DIRECT) @@ -1432,13 +1537,14 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, { char *kaddr; unsigned long left, count = desc->count; + struct inode *inode = page->mapping->host; if (size > count) size = count; if (!PageChecked(page)) { /* FIXME, do it per block */ - struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_root *root = BTRFS_I(inode)->root; int ret = btrfs_csum_verify_file_block(root, page->mapping->host->i_ino, page->index << PAGE_CACHE_SHIFT, @@ -1662,6 +1768,10 @@ static struct inode_operations btrfs_dir_inode_operations = { .rmdir = btrfs_rmdir, }; +static struct inode_operations btrfs_dir_ro_inode_operations = { + .lookup = btrfs_lookup, +}; + static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, @@ -1697,13 +1807,21 @@ static int __init init_btrfs_fs(void) err = init_inodecache(); if (err) return err; + kset_set_kset_s(&btrfs_subsys, fs_subsys); + err = subsystem_register(&btrfs_subsys); + if (err) + goto out; return register_filesystem(&btrfs_fs_type); +out: + destroy_inodecache(); + return err; } static void __exit exit_btrfs_fs(void) { destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); + subsystem_unregister(&btrfs_subsys); printk("btrfs unloaded\n"); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2b0f98c92123..20d84bd03223 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -52,6 +52,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; + /* FIXME, use the right root */ + root = root->fs_info->fs_root; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); @@ -69,6 +71,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_transaction *cur_trans; + + /* FIXME, use the right root */ + root = root->fs_info->fs_root; + WARN_ON(trans->magic != TRANS_MAGIC); WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); @@ -148,6 +154,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; DEFINE_WAIT(wait); + /* FIXME, use the right root */ + root = root->fs_info->fs_root; + mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { printk("already in commit!, waiting\n"); -- cgit v1.2.3 From 0f7d52f4431c530b4f39c524448c688bb7754de5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 9 Apr 2007 10:42:37 -0400 Subject: Btrfs: groundwork for subvolume and snapshot roots Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 +- fs/btrfs/ctree.h | 20 +++++- fs/btrfs/disk-io.c | 178 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/disk-io.h | 2 + fs/btrfs/super.c | 39 ++++++----- fs/btrfs/transaction.c | 140 ++++++++++++++++++++++++++++---------- 6 files changed, 282 insertions(+), 99 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 783f54c2a16c..9fc42e99c7df 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -86,7 +86,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, int i; int total_found = 0; - ret = radix_tree_gang_lookup(radix, (void *)&gang, 0, ARRAY_SIZE(gang)); + ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); for (i = 0; i < ret && nr > 0; i++) { found = 0; bits = gang[i]; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1ff5b99af682..5460030c9e6a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -232,12 +232,12 @@ struct btrfs_inode_map_item { struct crypto_hash; struct btrfs_fs_info { - struct btrfs_root *fs_root; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; + struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; u64 last_inode_alloc; @@ -266,6 +266,9 @@ struct btrfs_root { struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; + struct inode *inode; + u64 objectid; + u64 last_trans; u32 blocksize; int ref_cows; u32 type; @@ -595,7 +598,7 @@ static inline u32 btrfs_key_overflow(struct btrfs_key *key) static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) { - BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); over = over << BTRFS_KEY_OVERFLOW_SHIFT; key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; } @@ -634,7 +637,7 @@ static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, u32 over) { u32 flags = btrfs_disk_key_flags(key); - BUG_ON(over > BTRFS_KEY_OVERFLOW_MAX); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); over = over << BTRFS_KEY_OVERFLOW_SHIFT; flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; btrfs_set_disk_key_flags(key, flags); @@ -748,6 +751,17 @@ static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) s->blocknr = cpu_to_le64(val); } +static inline u64 btrfs_super_generation(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->generation); +} + +static inline void btrfs_set_super_generation(struct btrfs_super_block *s, + u64 val) +{ + s->generation = cpu_to_le64(val); +} + static inline u64 btrfs_super_root(struct btrfs_super_block *s) { return le64_to_cpu(s->root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9301a5e4608..b557bdd1e26a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4,9 +4,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -180,7 +182,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; - struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct buffer_head *head; if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, @@ -259,10 +261,13 @@ static int __setup_root(int blocksize, u64 objectid) { root->node = NULL; + root->inode = NULL; root->commit_root = NULL; root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; + root->objectid = objectid; + root->last_trans = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -287,10 +292,78 @@ static int find_and_setup_root(int blocksize, return 0; } +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path *path; + struct btrfs_leaf *l; + int ret = 0; + +printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = kmalloc(sizeof(*root), GFP_NOFS); + if (!root) { +printk("failed1\n"); + return ERR_PTR(-ENOMEM); + } + if (location->offset == (u64)-1) { + ret = find_and_setup_root(fs_info->sb->s_blocksize, + fs_info->tree_root, fs_info, + location->objectid, root); + if (ret) { +printk("failed2\n"); + kfree(root); + return ERR_PTR(ret); + } + goto insert; + } + + __setup_root(fs_info->sb->s_blocksize, root, fs_info, + location->objectid); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); + if (ret != 0) { +printk("internal search_slot gives us %d\n", ret); + if (ret > 0) + ret = -ENOENT; + goto out; + } + l = btrfs_buffer_leaf(path->nodes[0]); + memcpy(&root->root_item, + btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + if (ret) { + kfree(root); + return ERR_PTR(ret); + } + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + BUG_ON(!root->node); +insert: +printk("inserting %p\n", root); + root->ref_cows = 1; + ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + root); + if (ret) { +printk("radix_tree_insert gives us %d\n", ret); + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } +printk("all worked\n"); + return root; +} + struct btrfs_root *open_ctree(struct super_block *sb) { - struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -304,9 +377,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; - fs_info->fs_root = root; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; @@ -318,6 +391,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); @@ -337,13 +413,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!fs_info->sb_buffer) { + if (!fs_info->sb_buffer) return NULL; - } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - if (!btrfs_super_root(disk_super)) { + if (!btrfs_super_root(disk_super)) return NULL; - } + fs_info->disk_super = disk_super; tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); @@ -358,14 +433,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root); - BUG_ON(ret); - root->commit_root = root->node; - get_bh(root->node); - root->ref_cows = 1; - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); @@ -373,7 +442,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) kobject_set_name(&fs_info->kobj, "%s", sb->s_id); kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); - return root; + return tree_root; } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root @@ -398,12 +467,42 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int del_fs_roots(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_root *gang[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)gang[i]); + if (gang[i]->inode) + iput(gang[i]->inode); + else + printk("no inode for root %p\n", gang[i]); + if (gang[i]->node) + brelse(gang[i]->node); + if (gang[i]->commit_root) + brelse(gang[i]->commit_root); + kfree(gang[i]); + } + } + return 0; +} + int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; + struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -412,29 +511,26 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (root->node) - btrfs_block_release(root, root->node); - if (root->fs_info->extent_root->node) - btrfs_block_release(root->fs_info->extent_root, - root->fs_info->extent_root->node); - if (root->fs_info->inode_root->node) - btrfs_block_release(root->fs_info->inode_root, - root->fs_info->inode_root->node); - if (root->fs_info->tree_root->node) - btrfs_block_release(root->fs_info->tree_root, - root->fs_info->tree_root->node); - btrfs_block_release(root, root->commit_root); - btrfs_block_release(root, root->fs_info->sb_buffer); - crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); - kfree(root->fs_info->extent_root); - kfree(root->fs_info->inode_root); - kfree(root->fs_info->tree_root); - kobject_unregister(&root->fs_info->kobj); - kfree(root); + mutex_unlock(&fs_info->fs_mutex); + + if (fs_info->extent_root->node) + btrfs_block_release(fs_info->extent_root, + fs_info->extent_root->node); + if (fs_info->inode_root->node) + btrfs_block_release(fs_info->inode_root, + fs_info->inode_root->node); + if (fs_info->tree_root->node) + btrfs_block_release(fs_info->tree_root, + fs_info->tree_root->node); + btrfs_block_release(root, fs_info->sb_buffer); + crypto_free_hash(fs_info->hash_tfm); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); + del_fs_roots(fs_info); + kfree(fs_info->extent_root); + kfree(fs_info->inode_root); + kfree(fs_info->tree_root); + kobject_unregister(&fs_info->kobj); return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index ac6764ba8aa6..0ef6e6f714af 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,4 +39,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66d9fb2288c3..3c9236ca889c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -436,7 +436,6 @@ int fixup_tree_root_location(struct btrfs_root *root, { struct btrfs_path *path; struct btrfs_root_item *ri; - int ret; if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) return 0; @@ -447,22 +446,19 @@ int fixup_tree_root_location(struct btrfs_root *root, BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_lookup_inode(NULL, root, path, location, 0); - if (ret) - goto out; - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_root_item); + *sub_root = btrfs_read_fs_root(root->fs_info, location); + if (IS_ERR(*sub_root)) + return PTR_ERR(*sub_root); + + ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; - /* FIXME properly select the root */ - *sub_root = root->fs_info->fs_root; -out: + btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); - return ret; + return 0; } @@ -494,6 +490,15 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (!inode) return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { + if (sub_root != root) { + ret = radix_tree_insert( + &root->fs_info->fs_roots_radix, + (unsigned long)sub_root, + sub_root); +printk("adding new root for inode %lu\n", inode->i_ino); + igrab(inode); + sub_root->inode = inode; + } BTRFS_I(inode)->root = sub_root; memcpy(&BTRFS_I(inode)->location, &location, sizeof(location)); @@ -605,7 +610,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) struct inode * inode; struct dentry * root_dentry; struct btrfs_super_block *disk_super; - struct btrfs_root *root; + struct btrfs_root *tree_root; struct btrfs_inode *bi; sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -613,14 +618,14 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_op = &btrfs_super_ops; sb->s_time_gran = 1; - root = open_ctree(sb); + tree_root = open_ctree(sb); - if (!root) { + if (!tree_root) { printk("btrfs: open_ctree failed\n"); return -EIO; } - sb->s_fs_info = root; - disk_super = root->fs_info->disk_super; + sb->s_fs_info = tree_root; + disk_super = tree_root->fs_info->disk_super; printk("read in super total blocks %Lu root %Lu\n", btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); @@ -630,7 +635,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.objectid = inode->i_ino; bi->location.offset = 0; bi->location.flags = 0; - bi->root = root->fs_info->tree_root; + bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 20d84bd03223..83a0194ab163 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -8,6 +8,8 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; +#define BTRFS_ROOT_TRANS_TAG 0 + #define TRANS_MAGIC 0xE1E10E static void put_transaction(struct btrfs_transaction *transaction) { @@ -31,9 +33,10 @@ static int join_transaction(struct btrfs_root *root) GFP_NOFS); total_trans++; BUG_ON(!cur_trans); + root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; cur_trans->num_writers = 0; - cur_trans->transid = root->root_key.offset + 1; + cur_trans->transid = root->fs_info->generation; init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->commit_wait); cur_trans->magic = TRANS_MAGIC; @@ -51,13 +54,22 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, struct btrfs_trans_handle *h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); int ret; + u64 running_trans_id; - /* FIXME, use the right root */ - root = root->fs_info->fs_root; mutex_lock(&root->fs_info->trans_mutex); ret = join_transaction(root); BUG_ON(ret); - h->transid = root->fs_info->running_transaction->transid; + running_trans_id = root->fs_info->running_transaction->transid; + + if (root != root->fs_info->tree_root && root->last_trans < + running_trans_id) { + radix_tree_tag_set(&root->fs_info->fs_roots_radix, + (unsigned long)root, BTRFS_ROOT_TRANS_TAG); + root->commit_root = root->node; + get_bh(root->node); + } + root->last_trans = running_trans_id; + h->transid = running_trans_id; h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; @@ -72,9 +84,6 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, { struct btrfs_transaction *cur_trans; - /* FIXME, use the right root */ - root = root->fs_info->fs_root; - WARN_ON(trans->magic != TRANS_MAGIC); WARN_ON(trans->magic2 != TRANS_MAGIC); mutex_lock(&root->fs_info->trans_mutex); @@ -145,17 +154,96 @@ static int wait_for_commit(struct btrfs_root *root, return 0; } +struct dirty_root { + struct list_head list; + struct btrfs_key snap_key; + struct buffer_head *commit_root; + struct btrfs_root *root; +}; + +int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, struct list_head *list) +{ + struct dirty_root *dirty; + struct btrfs_root *gang[8]; + struct btrfs_root *root; + int i; + int ret; + int err; +printk("add dirty\n"); + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, + ARRAY_SIZE(gang), + BTRFS_ROOT_TRANS_TAG); + if (ret == 0) + break; + for (i = 0; i < ret; i++) { + root = gang[i]; + radix_tree_tag_clear(radix, (unsigned long)root, + BTRFS_ROOT_TRANS_TAG); + if (root->commit_root == root->node) { + WARN_ON(root->node->b_blocknr != + btrfs_root_blocknr(&root->root_item)); + brelse(root->commit_root); + root->commit_root = NULL; + continue; + } + dirty = kmalloc(sizeof(*dirty), GFP_NOFS); + BUG_ON(!dirty); + memcpy(&dirty->snap_key, &root->root_key, + sizeof(root->root_key)); + dirty->commit_root = root->commit_root; + root->commit_root = NULL; + dirty->root = root; +printk("adding dirty root %Lu gen %Lu blocknr %Lu\n", root->root_key.objectid, root->root_key.offset, dirty->commit_root->b_blocknr); + root->root_key.offset = root->fs_info->generation; + btrfs_set_root_blocknr(&root->root_item, + root->node->b_blocknr); + err = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, + &root->root_item); + BUG_ON(err); + list_add(&dirty->list, list); + } + } +printk("add dirty done\n"); + return 0; +} + +int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) +{ + struct dirty_root *dirty; + struct btrfs_trans_handle *trans; + int ret; + + while(!list_empty(list)) { + dirty = list_entry(list->next, struct dirty_root, list); + list_del_init(&dirty->list); + trans = btrfs_start_transaction(tree_root, 1); +printk("drop snapshot root %p, commit_root blocknr %Lu generation %Lu\n", dirty->root, dirty->commit_root->b_blocknr, dirty->snap_key.offset); + ret = btrfs_drop_snapshot(trans, dirty->root, + dirty->commit_root); + BUG_ON(ret); + +printk("del root objectid %Lu, offset %Lu\n", dirty->snap_key.objectid, dirty->snap_key.offset); + ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); + BUG_ON(ret); + ret = btrfs_end_transaction(trans, tree_root); + BUG_ON(ret); + kfree(dirty); + } + return 0; +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret = 0; - struct buffer_head *snap; - struct btrfs_key snap_key; struct btrfs_transaction *cur_trans; + struct list_head dirty_fs_roots; DEFINE_WAIT(wait); - /* FIXME, use the right root */ - root = root->fs_info->fs_root; + INIT_LIST_HEAD(&dirty_fs_roots); mutex_lock(&root->fs_info->trans_mutex); if (trans->transaction->in_commit) { @@ -184,22 +272,13 @@ printk("already in commit!, waiting\n"); } finish_wait(&trans->transaction->writer_wait, &wait); WARN_ON(cur_trans != trans->transaction); - if (root->node != root->commit_root) { - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - } - - if (btrfs_root_blocknr(&root->root_item) != root->node->b_blocknr) { - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - } - + add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; + btrfs_set_super_generation(root->fs_info->disk_super, + root->fs_info->generation + 1); mutex_unlock(&root->fs_info->trans_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); @@ -213,21 +292,8 @@ printk("already in commit!, waiting\n"); put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); - if (root->node != root->commit_root) { - trans = btrfs_start_transaction(root, 1); - snap = root->commit_root; - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - ret = btrfs_del_root(trans, root->fs_info->tree_root, - &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - } + drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); return ret; } -- cgit v1.2.3 From c5739bba5260a59cebd20a51a55080592c8d3b07 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 09:27:04 -0400 Subject: Btrfs: snapshot progress Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 15 +++++- fs/btrfs/dir-item.c | 6 ++- fs/btrfs/disk-io.c | 4 -- fs/btrfs/extent-tree.c | 6 +++ fs/btrfs/ioctl.h | 13 +++++ fs/btrfs/root-tree.c | 16 +++++- fs/btrfs/super.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 183 insertions(+), 12 deletions(-) create mode 100644 fs/btrfs/ioctl.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5460030c9e6a..2cbcaaeac9e5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,7 +227,7 @@ struct btrfs_csum_item { } __attribute__ ((__packed__)); struct btrfs_inode_map_item { - struct btrfs_disk_key key; + u32 refs; } __attribute__ ((__packed__)); struct crypto_hash; @@ -883,6 +883,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u32 btrfs_inode_map_refs(struct btrfs_inode_map_item *m) +{ + return le32_to_cpu(m->refs); +} + +static inline void btrfs_set_inode_map_refs(struct btrfs_inode_map_item *m, + u32 val) +{ + m->refs = cpu_to_le32(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -925,6 +936,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-item.c */ +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7aed9f015b55..0ba46bc0da9a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -55,12 +55,14 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_dir_flags(dir_item, 0); btrfs_set_dir_name_len(dir_item, name_len); name_ptr = (char *)(dir_item + 1); + + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + /* FIXME, use some real flag for selecting the extra index */ if (root == root->fs_info->tree_root) goto out; - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b557bdd1e26a..6b097ede80b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,10 +16,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); } - if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) { - BUG(); - } return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7c21f63f1b93..efc604eea0b0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -77,6 +77,12 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + return inc_block_ref(trans, root, root->node->b_blocknr, 1); +} + int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h new file mode 100644 index 000000000000..201fb3270723 --- /dev/null +++ b/fs/btrfs/ioctl.h @@ -0,0 +1,13 @@ +#ifndef __IOCTL_ +#define __IOCTL_ +#include + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_VOL_NAME_MAX 255 +struct btrfs_ioctl_vol_args { + char name[BTRFS_VOL_NAME_MAX + 1]; +}; + +#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ + struct btrfs_ioctl_vol_args) +#endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index ddc1c13a5352..72be9836932f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -83,6 +83,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct btrfs_path *path; int ret; + u32 refs; + struct btrfs_root_item *ri; path = btrfs_alloc_path(); BUG_ON(!path); @@ -91,7 +93,19 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ret = btrfs_del_item(trans, root, path); + ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_root_item); + + refs = btrfs_root_refs(ri); + BUG_ON(refs == 0); + if (refs == 1) { + ret = btrfs_del_item(trans, root, path); +printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); + } else { + btrfs_set_root_refs(ri, refs - 1); +printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); + mark_buffer_dirty(path->nodes[0]); + } out: btrfs_release_path(root, path); btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3c9236ca889c..bbe5cabcb42e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -15,6 +15,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "ioctl.h" void btrfs_fsinfo_release(struct kobject *obj) { @@ -27,6 +28,11 @@ struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; +struct btrfs_iget_args { + u64 ino; + struct btrfs_root *root; +}; + decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); #define BTRFS_SUPER_MAGIC 0x9123682E @@ -461,6 +467,34 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } +int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + return 0; +} + +int btrfs_find_actor(struct inode *inode, void *opaque) +{ + struct btrfs_iget_args *args = opaque; + return (args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root); +} + +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + inode = iget5_locked(s, objectid, btrfs_find_actor, + btrfs_init_locked_inode, + (void *)&args); + return inode; +} static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) @@ -486,7 +520,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); if (ret > 0) return ERR_PTR(-ENOENT); - inode = iget_locked(dir->i_sb, location.objectid); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, + sub_root); if (!inode) return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { @@ -495,7 +530,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, &root->fs_info->fs_roots_radix, (unsigned long)sub_root, sub_root); -printk("adding new root for inode %lu\n", inode->i_ino); +printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); igrab(inode); sub_root->inode = inode; } @@ -630,7 +665,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) btrfs_super_total_blocks(disk_super), btrfs_super_root_dir(disk_super)); - inode = iget_locked(sb, btrfs_super_root_dir(disk_super)); + inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), + tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; @@ -750,7 +786,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; fill_inode_item(&inode_item, inode); key->objectid = objectid; @@ -1650,6 +1686,95 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item new_root_item; + int ret; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, root->inode); + BUG_ON(ret); + + ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + BUG_ON(ret); + + memset(&new_root_item, 0, sizeof(new_root_item)); + memcpy(&new_root_item, &root->root_item, + sizeof(new_root_item)); + + key.objectid = objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + ret = btrfs_insert_inode_map(trans, root, objectid, &key); + BUG_ON(ret); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + BUG_ON(ret); + +printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, 0); + + BUG_ON(ret); + + ret = btrfs_inc_root_ref(trans, root); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_vol_args vol_args; + int ret; + int namelen; + + if (!root->ref_cows) + return -EINVAL; + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + ret = create_snapshot(root, vol_args.name, namelen); + WARN_ON(ret); + break; + default: + return -ENOTTY; + } + return 0; +} + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -1781,6 +1906,7 @@ static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_readdir, + .ioctl = btrfs_ioctl, }; static struct address_space_operations btrfs_aops = { @@ -1803,6 +1929,7 @@ static struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = generic_file_mmap, .open = generic_file_open, + .ioctl = btrfs_ioctl, }; static int __init init_btrfs_fs(void) -- cgit v1.2.3 From 1b05da2ee6217e7d55460d04335813fec25be4ca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 12:13:09 -0400 Subject: Btrfs: drop the inode map tree Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 -- fs/btrfs/ctree.h | 39 +++------------------- fs/btrfs/disk-io.c | 25 +++++--------- fs/btrfs/inode-item.c | 2 ++ fs/btrfs/inode-map.c | 65 +++--------------------------------- fs/btrfs/print-tree.c | 8 ----- fs/btrfs/super.c | 89 ++++++++------------------------------------------ fs/btrfs/transaction.c | 7 ---- 8 files changed, 33 insertions(+), 204 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3439ffb467b5..43a4f1ddb375 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -2,11 +2,9 @@ #define __BTRFS_I__ struct btrfs_inode { - u32 magic; struct btrfs_root *root; struct btrfs_key location; struct inode vfs_inode; - u32 magic2; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2cbcaaeac9e5..5103709bb2b9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -14,9 +14,8 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL -#define BTRFS_INODE_MAP_OBJECTID 3ULL -#define BTRFS_FS_TREE_OBJECTID 4ULL -#define BTRFS_FIRST_FREE_OBJECTID 5ULL +#define BTRFS_FS_TREE_OBJECTID 3ULL +#define BTRFS_FIRST_FREE_OBJECTID 4ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -62,7 +61,6 @@ struct btrfs_header { __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 parentid; /* objectid of the tree root */ - __le32 ham; __le16 nritems; __le16 flags; u8 level; @@ -226,23 +224,16 @@ struct btrfs_csum_item { u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); -struct btrfs_inode_map_item { - u32 refs; -} __attribute__ ((__packed__)); - struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; - struct btrfs_root *inode_root; struct btrfs_key current_insert; struct btrfs_key last_insert; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - u64 last_inode_alloc; u64 generation; - u64 highest_inode; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; struct buffer_head *sb_buffer; @@ -272,6 +263,8 @@ struct btrfs_root { u32 blocksize; int ref_cows; u32 type; + u64 highest_inode; + u64 last_inode_alloc; }; /* the lower bits in the key flags defines the item type */ @@ -320,16 +313,11 @@ struct btrfs_root { */ #define BTRFS_EXTENT_ITEM_KEY 8 -/* - * the inode map records which inode numbers are in use and where - * they actually live on disk - */ -#define BTRFS_INODE_MAP_ITEM_KEY 9 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 9 static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -883,17 +871,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u32 btrfs_inode_map_refs(struct btrfs_inode_map_item *m) -{ - return le32_to_cpu(m->refs); -} - -static inline void btrfs_set_inode_map_refs(struct btrfs_inode_map_item *m, - u32 val) -{ - m->refs = cpu_to_le32(val); -} - static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; @@ -996,12 +973,6 @@ int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, u64 dirid, u64 *objectid); -int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, struct btrfs_key *location); -int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct btrfs_path *path, - u64 objectid, int mod); int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b097ede80b1..760fdc9a7664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -264,6 +264,8 @@ static int __setup_root(int blocksize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; + root->highest_inode = 0; + root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -295,6 +297,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; struct btrfs_leaf *l; + u64 highest_inode; int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); @@ -354,6 +357,12 @@ printk("radix_tree_insert gives us %d\n", ret); kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; +printk("highest inode is %Lu\n", highest_inode); + } printk("all worked\n"); return root; } @@ -364,8 +373,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; @@ -378,9 +385,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->inode_root = inode_root; - fs_info->last_inode_alloc = 0; - fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -425,14 +429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root); - BUG_ON(ret); - fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); - if (ret == 0) - fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); kobject_set_name(&fs_info->kobj, "%s", sb->s_id); @@ -512,9 +509,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->inode_root->node) - btrfs_block_release(fs_info->inode_root, - fs_info->inode_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -524,7 +518,6 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); del_fs_roots(fs_info); kfree(fs_info->extent_root); - kfree(fs_info->inode_root); kfree(fs_info->tree_root); kobject_unregister(&fs_info->kobj); return 0; diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b276a3b40a66..c3b990b661c2 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -22,6 +22,8 @@ int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root sizeof(*inode_item)); btrfs_release_path(root, path); btrfs_free_path(path); + if (ret == 0 && objectid > root->highest_inode) + root->highest_inode = objectid; return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f665221409ac..318e27a6378b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -3,12 +3,11 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid) +int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; struct btrfs_leaf *l; - struct btrfs_root *root = fs_root->fs_info->inode_root; struct btrfs_key search_key; int slot; @@ -38,7 +37,7 @@ error: * walks the btree of allocated inodes and find a hole. */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, + struct btrfs_root *root, u64 dirid, u64 *objectid) { struct btrfs_path *path; @@ -49,16 +48,13 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, u64 last_ino = 0; int start_found; struct btrfs_leaf *l; - struct btrfs_root *root = fs_root->fs_info->inode_root; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); search_key.flags = 0; - btrfs_set_key_type(&search_key, BTRFS_INODE_MAP_ITEM_KEY); - - search_start = fs_root->fs_info->last_inode_alloc; + search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; search_key.offset = 0; @@ -108,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, } // FIXME -ENOSPC found: - root->fs_info->last_inode_alloc = *objectid; + root->last_inode_alloc = *objectid; btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); @@ -118,56 +114,3 @@ error: btrfs_free_path(path); return ret; } - -int btrfs_insert_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, - u64 objectid, struct btrfs_key *location) -{ - int ret = 0; - struct btrfs_path *path; - struct btrfs_inode_map_item *inode_item; - struct btrfs_key key; - struct btrfs_root *inode_root = fs_root->fs_info->inode_root; - - key.objectid = objectid; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); - key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_insert_empty_item(trans, inode_root, path, &key, - sizeof(struct btrfs_inode_map_item)); - if (ret) - goto out; - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_inode_map_item); - btrfs_cpu_key_to_disk(&inode_item->key, location); - btrfs_mark_buffer_dirty(path->nodes[0]); - if (objectid > fs_root->fs_info->highest_inode) - fs_root->fs_info->highest_inode = objectid; -out: - btrfs_release_path(inode_root, path); - btrfs_free_path(path); - return ret; -} - -int btrfs_lookup_inode_map(struct btrfs_trans_handle *trans, - struct btrfs_root *fs_root, struct btrfs_path *path, - u64 objectid, int mod) -{ - int ret; - struct btrfs_key key; - int ins_len = mod < 0 ? -1 : 0; - int cow = mod != 0; - struct btrfs_root *inode_root = fs_root->fs_info->inode_root; - - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_MAP_ITEM_KEY); - ret = btrfs_search_slot(trans, inode_root, &key, path, ins_len, cow); - return ret; -} - diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index f0da65c4f96b..854d47d9bdc9 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -10,7 +10,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_extent_item *ei; struct btrfs_root_item *ri; struct btrfs_dir_item *di; - struct btrfs_inode_map_item *mi; struct btrfs_inode_item *ii; u32 type; @@ -54,13 +53,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u owner %Lu\n", btrfs_extent_refs(ei), btrfs_extent_owner(ei)); break; - case BTRFS_INODE_MAP_ITEM_KEY: - mi = btrfs_item_ptr(l, i, struct btrfs_inode_map_item); - printk("\t\tinode map key %Lu %u %Lu\n", - btrfs_disk_key_objectid(&mi->key), - btrfs_disk_key_flags(&mi->key), - btrfs_disk_key_offset(&mi->key)); - break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index bbe5cabcb42e..3e8bfb0e5d7e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,14 +45,6 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct file_operations btrfs_file_operations; -static int check_inode(struct inode *inode) -{ - struct btrfs_inode *ei = BTRFS_I(inode); - WARN_ON(ei->magic != 0xDEADBEEF); - WARN_ON(ei->magic2 != 0xDEADBEAF); - return 0; -} - static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -66,15 +58,12 @@ static void btrfs_read_locked_inode(struct inode *inode) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); - check_inode(inode); - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); if (ret) { btrfs_free_path(path); goto make_bad; } - check_inode(inode); inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_inode_item); @@ -97,7 +86,7 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_item = NULL; mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); + switch (inode->i_mode & S_IFMT) { #if 0 default: @@ -121,7 +110,6 @@ static void btrfs_read_locked_inode(struct inode *inode) // inode->i_op = &page_symlink_inode_operations; break; } - check_inode(inode); return; make_bad: @@ -272,10 +260,7 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { - u64 objectid = inode->i_ino; struct btrfs_path *path; - struct btrfs_inode_map_item *map; - struct btrfs_key stat_data_key; int ret; clear_inode(inode); @@ -283,26 +268,11 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode_map(trans, root, path, objectid, -1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto error; - } - map = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_inode_map_item); - btrfs_disk_key_to_cpu(&stat_data_key, &map->key); - ret = btrfs_del_item(trans, root->fs_info->inode_root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); BUG_ON(ret); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); -error: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -432,7 +402,6 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, out: btrfs_release_path(root, path); btrfs_free_path(path); - check_inode(dir); return ret; } @@ -540,9 +509,7 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r btrfs_read_locked_inode(inode); unlock_new_inode(inode); } - check_inode(inode); } - check_inode(dir); return d_splice_alias(inode, dentry); } @@ -566,7 +533,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; - mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; key.flags = 0; @@ -575,9 +541,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) path = btrfs_alloc_path(); btrfs_init_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { + if (ret < 0) goto err; - } advance = 0; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); @@ -601,8 +566,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (key_type == BTRFS_DIR_INDEX_KEY && - btrfs_disk_key_offset(&item->key) > - root->fs_info->highest_inode) + btrfs_disk_key_offset(&item->key) > root->highest_inode) break; if (btrfs_disk_key_type(&item->key) != key_type) continue; @@ -707,7 +671,6 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); - check_inode(inode); } static int btrfs_update_inode(struct btrfs_trans_handle *trans, @@ -721,7 +684,6 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, 1); if (ret) { @@ -736,11 +698,11 @@ static int btrfs_update_inode(struct btrfs_trans_handle *trans, fill_inode_item(inode_item, inode); btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; failed: btrfs_release_path(root, path); btrfs_free_path(path); - check_inode(inode); - return 0; + return ret; } static int btrfs_write_inode(struct inode *inode, int wait) @@ -757,7 +719,6 @@ static int btrfs_write_inode(struct inode *inode, int wait) else btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); return ret; } @@ -767,7 +728,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct inode *inode; struct btrfs_inode_item inode_item; struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_key *key; + struct btrfs_key *location; int ret; u64 objectid; @@ -776,8 +737,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = BTRFS_I(dir)->root; - key = &BTRFS_I(inode)->location; - check_inode(inode); ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); BUG_ON(ret); @@ -788,20 +747,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; fill_inode_item(&inode_item, inode); - - key->objectid = objectid; - key->flags = 0; - key->offset = 0; - btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, key); - BUG_ON(ret); + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->flags = 0; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); BUG_ON(ret); insert_inode_hash(inode); - check_inode(inode); - check_inode(dir); return inode; } @@ -825,8 +780,6 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, dentry->d_parent->d_inode); } - check_inode(inode); - check_inode(dentry->d_parent->d_inode); return ret; } @@ -840,7 +793,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, } if (err > 0) err = -EEXIST; - check_inode(inode); return err; } @@ -872,8 +824,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - check_inode(inode); - check_inode(dir); if (drop_inode) { inode_dec_link_count(inode); @@ -1701,20 +1651,14 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_update_inode(trans, root, root->inode); BUG_ON(ret); - ret = btrfs_find_free_objectid(trans, root, 0, &objectid); + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); BUG_ON(ret); memset(&new_root_item, 0, sizeof(new_root_item)); memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); - key.objectid = objectid; - key.flags = 0; - key.offset = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode_map(trans, root, objectid, &key); - BUG_ON(ret); - key.objectid = objectid; key.offset = 1; key.flags = 0; @@ -1791,21 +1735,14 @@ static struct inode *btrfs_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; - ei->magic = 0xDEADBEEF; - ei->magic2 = 0xDEADBEAF; return &ei->vfs_inode; } static void btrfs_destroy_inode(struct inode *inode) { - struct btrfs_inode *ei = BTRFS_I(inode); - WARN_ON(ei->magic != 0xDEADBEEF); - WARN_ON(ei->magic2 != 0xDEADBEAF); WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - ei->magic = 0; - ei->magic2 = 0; kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 83a0194ab163..54978d1bd958 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -115,14 +115,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->b_blocknr) -- cgit v1.2.3 From 48ddc6f44b5a167c00573b6012727d674e9c667a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 13:20:21 -0400 Subject: Btrfs: cow file extents before writing Signed-off-by: Chris Mason --- fs/btrfs/super.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3e8bfb0e5d7e..91aa1b4c50af 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1020,7 +1020,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - iblock << inode->i_blkbits, 0); + iblock << inode->i_blkbits, 1); if (ret < 0) { err = ret; goto out; @@ -1040,9 +1040,25 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); - /* exact match found, use it */ + /* exact match found, use it, FIXME, deal with extents + * other than the page size + */ if (ret == 0) { err = 0; + BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); + if (btrfs_file_extent_generation(item) != trans->transid) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, 1, + blocknr, (u64)-1, &ins); + BUG_ON(ret); + btrfs_set_file_extent_disk_blocknr(item, ins.objectid); + mark_buffer_dirty(path->nodes[0]); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 0); + BUG_ON(ret); + blocknr = ins.objectid; + + } map_bh(result, inode->i_sb, blocknr); goto out; } -- cgit v1.2.3 From 79b2cb1fa6430501907cb1384abbddf074833e45 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 13:51:41 -0400 Subject: Btrfs: only cow in get_block when create==1 Signed-off-by: Chris Mason --- fs/btrfs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 91aa1b4c50af..a3183972f310 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1020,7 +1020,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - iblock << inode->i_blkbits, 1); + iblock << inode->i_blkbits, create); if (ret < 0) { err = ret; goto out; @@ -1046,7 +1046,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret == 0) { err = 0; BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); - if (btrfs_file_extent_generation(item) != trans->transid) { + if (create && + btrfs_file_extent_generation(item) != trans->transid) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, 1, blocknr, (u64)-1, &ins); -- cgit v1.2.3 From 2932f3ec94ff0212313b8187064f93bcb76ce411 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 14:22:02 -0400 Subject: Btrfs: when forced to cow for file_write, get the page uptodate first Signed-off-by: Chris Mason --- fs/btrfs/super.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a3183972f310..b93d790e7484 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1268,6 +1268,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, struct page **pages, size_t num_pages, loff_t pos, + unsigned long first_index, + unsigned long last_index, size_t write_bytes) { int i; @@ -1289,6 +1291,13 @@ static int prepare_pages(struct btrfs_trans_handle *trans, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + if (!PageUptodate(pages[i]) && + (pages[i]->index == first_index || + pages[i]->index == last_index) && pos < isize) { + ret = mpage_readpage(pages[i], btrfs_get_block); + BUG_ON(ret); + lock_page(pages[i]); + } ret = nobh_prepare_write(pages[i], offset, offset + this_write, btrfs_get_block); @@ -1323,6 +1332,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[1]; + unsigned long first_index; + unsigned long last_index; if (file->f_flags & O_DIRECT) return -EINVAL; @@ -1340,13 +1351,15 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; file_update_time(file); mutex_lock(&inode->i_mutex); + first_index = pos >> PAGE_CACHE_SHIFT; + last_index = (pos + count) >> PAGE_CACHE_SHIFT; while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ret = prepare_pages(NULL, root, file, pages, num_pages, - pos, write_bytes); + pos, first_index, last_index, write_bytes); BUG_ON(ret); ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); -- cgit v1.2.3 From 2619ba1f0ff9540a9d84683310a1e350b5efde3d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 16:58:11 -0400 Subject: Btrfs: subvolumes Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/disk-io.c | 38 +++++++---- fs/btrfs/super.c | 178 +++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/transaction.c | 8 ++- 4 files changed, 182 insertions(+), 43 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 73d4b842a316..4cd11a1b4b32 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Check compat and incompat flags on the inode * Add virtual filesystems, mountable snapshots * Get rid of struct ctree_path, limiting tree levels held at one time +* EEXIST for dirs instead of hash overflow * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 760fdc9a7664..ff69162829b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -301,6 +301,12 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) { +printk("found %p in cache\n", root); + return root; + } root = kmalloc(sizeof(*root), GFP_NOFS); if (!root) { printk("failed1\n"); @@ -349,7 +355,8 @@ out: insert: printk("inserting %p\n", root); root->ref_cows = 1; - ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + ret = radix_tree_insert(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, root); if (ret) { printk("radix_tree_insert gives us %d\n", ret); @@ -460,6 +467,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +{ + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid); + if (root->inode) + iput(root->inode); + if (root->node) + brelse(root->node); + if (root->commit_root) + brelse(root->commit_root); + kfree(root); + return 0; +} + int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; @@ -472,19 +493,8 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) ARRAY_SIZE(gang)); if (!ret) break; - for (i = 0; i < ret; i++) { - radix_tree_delete(&fs_info->fs_roots_radix, - (unsigned long)gang[i]); - if (gang[i]->inode) - iput(gang[i]->inode); - else - printk("no inode for root %p\n", gang[i]); - if (gang[i]->node) - brelse(gang[i]->node); - if (gang[i]->commit_root) - brelse(gang[i]->commit_root); - kfree(gang[i]); - } + for (i = 0; i < ret; i++) + free_fs_root(fs_info, gang[i]); } return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b93d790e7484..84abdde4e301 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -495,10 +495,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-EACCES); if (inode->i_state & I_NEW) { if (sub_root != root) { - ret = radix_tree_insert( - &root->fs_info->fs_roots_radix, - (unsigned long)sub_root, - sub_root); printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); igrab(inode); sub_root->inode = inode; @@ -723,22 +719,19 @@ static int btrfs_write_inode(struct inode *inode, int wait) } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, - struct inode *dir, int mode) + struct btrfs_root *root, + u64 objectid, int mode) { struct inode *inode; struct btrfs_inode_item inode_item; - struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_key *location; int ret; - u64 objectid; - inode = new_inode(dir->i_sb); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); - BTRFS_I(inode)->root = BTRFS_I(dir)->root; - ret = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - BUG_ON(ret); + BTRFS_I(inode)->root = root; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -804,10 +797,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; int drop_inode = 0; + u64 objectid; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - inode = btrfs_new_inode(trans, dir, mode); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; @@ -833,9 +834,9 @@ out_unlock: } static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, - struct inode *inode, struct inode *dir) + struct btrfs_root *root, + u64 objectid, u64 dirid) { - struct btrfs_root *root = BTRFS_I(dir)->root; int ret; char buf[2]; struct btrfs_key key; @@ -843,22 +844,20 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, buf[0] = '.'; buf[1] = '.'; - key.objectid = inode->i_ino; + key.objectid = objectid; key.offset = 0; key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_dir_item(trans, root, buf, 1, inode->i_ino, + ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, 1); if (ret) goto error; - key.objectid = dir->i_ino; - ret = btrfs_insert_dir_item(trans, root, buf, 2, inode->i_ino, + key.objectid = dirid; + ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, 1); if (ret) goto error; - inode->i_size = 6; - ret = btrfs_update_inode(trans, root, inode); error: return ret; } @@ -870,6 +869,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct btrfs_root *root = BTRFS_I(dir)->root; int err = 0; int drop_on_err = 0; + u64 objectid; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -877,7 +877,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(trans); goto out_unlock; } - inode = btrfs_new_inode(trans, dir, S_IFDIR | mode); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -886,7 +893,12 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - err = btrfs_make_empty_dir(trans, inode, dir); + err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); + if (err) + goto out_fail; + + inode->i_size = 6; + err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; err = btrfs_add_link(trans, dentry, inode); @@ -1666,6 +1678,102 @@ static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return retval; } +static int create_subvol(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct buffer_head *subvol; + struct btrfs_leaf *leaf; + struct btrfs_root *new_root; + struct inode *inode; + int ret; + u64 objectid; + u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + subvol = btrfs_alloc_free_block(trans, root); + leaf = btrfs_buffer_leaf(subvol); + btrfs_set_header_nritems(&leaf->header, 0); + btrfs_set_header_level(&leaf->header, 0); + btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr); + btrfs_set_header_generation(&leaf->header, trans->transid); + memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, + sizeof(leaf->header.fsid)); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + btrfs_set_inode_generation(inode_item, 1); + btrfs_set_inode_size(inode_item, 3); + btrfs_set_inode_nlink(inode_item, 1); + btrfs_set_inode_nblocks(inode_item, 1); + btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + + btrfs_set_root_blocknr(&root_item, subvol->b_blocknr); + btrfs_set_root_refs(&root_item, 1); + + mark_buffer_dirty(subvol); + brelse(subvol); + subvol = NULL; + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + btrfs_set_root_dirid(&root_item, new_dirid); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, 0); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + new_root = btrfs_read_fs_root(root->fs_info, &key); + BUG_ON(!new_root); + + trans = btrfs_start_transaction(new_root, 1); + BUG_ON(!trans); + + inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); + BUG_ON(ret); + + inode->i_nlink = 1; + inode->i_size = 6; + ret = btrfs_update_inode(trans, new_root, inode); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, new_root); + BUG_ON(ret); + + iput(inode); + + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + static int create_snapshot(struct btrfs_root *root, char *name, int namelen) { struct btrfs_trans_handle *trans; @@ -1674,6 +1782,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) int ret; u64 objectid; + if (!root->ref_cows) + return -EINVAL; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); @@ -1685,7 +1796,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) 0, &objectid); BUG_ON(ret); - memset(&new_root_item, 0, sizeof(new_root_item)); memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -1728,9 +1838,9 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int struct btrfs_ioctl_vol_args vol_args; int ret; int namelen; + struct btrfs_path *path; + u64 root_dirid; - if (!root->ref_cows) - return -EINVAL; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: if (copy_from_user(&vol_args, @@ -1740,7 +1850,23 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; - ret = create_snapshot(root, vol_args.name, namelen); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root_dirid = btrfs_root_dirid( + &root->fs_info->tree_root->root_item); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args.name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret == 0) + return -EEXIST; + + if (root == root->fs_info->tree_root) + ret = create_subvol(root, vol_args.name, namelen); + else + ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; default: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f64c1729b0e1..e15a072407bf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -64,7 +64,8 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, if (root != root->fs_info->tree_root && root->last_trans < running_trans_id) { radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root, BTRFS_ROOT_TRANS_TAG); + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); root->commit_root = root->node; get_bh(root->node); } @@ -171,8 +172,9 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, break; for (i = 0; i < ret; i++) { root = gang[i]; - radix_tree_tag_clear(radix, (unsigned long)root, - BTRFS_ROOT_TRANS_TAG); + radix_tree_tag_clear(radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { WARN_ON(root->node->b_blocknr != btrfs_root_blocknr(&root->root_item)); -- cgit v1.2.3 From 2d13d8d065cc7bbee6b1ad6ee3fe965d66badfe6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 20:07:20 -0400 Subject: Btrfs: detect duplicate subvol names Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 84abdde4e301..2972aaa9eb7e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1853,13 +1853,13 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int path = btrfs_alloc_path(); if (!path) return -ENOMEM; - root_dirid = btrfs_root_dirid( - &root->fs_info->tree_root->root_item); + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args.name, namelen, 0); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); if (ret == 0) return -EEXIST; -- cgit v1.2.3 From 7eccb903a817e890c947ba4bc90c6a9af9b4219a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 15:53:25 -0400 Subject: Btrfs: create a logical->phsyical block number mapping scheme Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 38 +++++++------- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 140 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/disk-io.h | 1 + fs/btrfs/extent-tree.c | 8 +-- fs/btrfs/super.c | 6 +-- fs/btrfs/transaction.c | 8 +-- 7 files changed, 134 insertions(+), 68 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f31ac843bf2e..e2fc6f0d0243 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -66,21 +66,21 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); + btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; get_bh(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); } else { btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - cow->b_blocknr); + bh_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); + btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); } btrfs_block_release(root, buf); mark_buffer_dirty(cow); @@ -312,7 +312,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent_buf) { struct buffer_head *child; - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); if (btrfs_header_nritems(&mid->header) != 1) return 0; @@ -361,7 +361,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = right_buf->b_blocknr; + u64 blocknr = bh_blocknr(right_buf); clean_tree_block(trans, root, right_buf); wait_on_buffer(right_buf); btrfs_block_release(root, right_buf); @@ -400,7 +400,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(&mid->header) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = mid_buf->b_blocknr; + u64 blocknr = bh_blocknr(mid_buf); clean_tree_block(trans, root, mid_buf); wait_on_buffer(mid_buf); btrfs_block_release(root, mid_buf); @@ -686,7 +686,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, t->b_blocknr); + btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, @@ -697,7 +697,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root lower_key = &lower->ptrs[0].key; btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); + btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); btrfs_mark_buffer_dirty(t); @@ -780,7 +780,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); + btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); @@ -794,7 +794,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(split_buffer); wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - split_buffer->b_blocknr, path->slots[level + 1] + 1, + bh_blocknr(split_buffer), path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; @@ -1138,7 +1138,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1152,7 +1152,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1173,7 +1173,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1] - 1, 1); if (wret) ret = wret; @@ -1207,7 +1207,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&l->header, mid); ret = 0; wret = insert_ptr(trans, root, path, &right->items[0].key, - right_buffer->b_blocknr, path->slots[1] + 1, 1); + bh_blocknr(right_buffer), path->slots[1] + 1, 1); if (wret) ret = wret; btrfs_mark_buffer_dirty(right_buffer); @@ -1228,7 +1228,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); + btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, @@ -1237,7 +1237,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(&right->header, 0); wret = insert_ptr(trans, root, path, &disk_key, - right_buffer->b_blocknr, + bh_blocknr(right_buffer), path->slots[1], 1); if (wret) ret = wret; @@ -1456,7 +1456,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - leaf_buf->b_blocknr, 1, 1); + bh_blocknr(leaf_buf), 1, 1); if (wret) ret = wret; } @@ -1487,7 +1487,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = leaf_buf->b_blocknr; + u64 blocknr = bh_blocknr(leaf_buf); clean_tree_block(trans, root, leaf_buf); wait_on_buffer(leaf_buf); wret = del_ptr(trans, root, path, 1, slot); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 01310de2bf4a..454eb88611bb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -242,6 +242,7 @@ struct btrfs_fs_info { struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; + struct radix_tree_root dev_radix; u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e09233262af6..c872a7e67abd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,10 +10,30 @@ #include "transaction.h" #include "btrfs_inode.h" +struct dev_lookup { + u64 block_start; + u64 num_blocks; + struct block_device *bdev; +}; + +u64 bh_blocknr(struct buffer_head *bh) +{ + int blkbits = bh->b_page->mapping->host->i_blkbits; + u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); + unsigned long offset; + + if (PageHighMem(bh->b_page)) + offset = (unsigned long)bh->b_data; + else + offset = bh->b_data - (char *)page_address(bh->b_page); + blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); + return blocknr; +} + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { + if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { BUG(); } return 0; @@ -40,7 +60,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) head = page_buffers(page); bh = head; do { - if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -56,6 +76,33 @@ out_unlock: return ret; } +static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical) +{ + struct dev_lookup *lookup[2]; + char b[BDEVNAME_SIZE]; + + int ret; + + root = root->fs_info->dev_root; + ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, + (void **)lookup, + (unsigned long)logical, + ARRAY_SIZE(lookup)); + if (ret == 0 || lookup[0]->block_start > logical || + lookup[0]->block_start + lookup[0]->num_blocks <= logical) { + ret = -ENOENT; + goto out; + } + bh->b_bdev = lookup[0]->bdev; + bh->b_blocknr = logical - lookup[0]->block_start; +printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); + set_buffer_mapped(bh); + ret = 0; +out: + return ret; +} + struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { @@ -66,6 +113,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; struct buffer_head *ret = NULL; + int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); page = grab_cache_page(mapping, index); @@ -78,11 +126,10 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - bh->b_bdev = root->fs_info->sb->s_bdev; - bh->b_blocknr = first_block; - set_buffer_mapped(bh); + err = map_bh_to_logical(root, bh, first_block); + BUG_ON(err); } - if (bh->b_blocknr == blocknr) { + if (bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -98,38 +145,13 @@ out_unlock: return ret; } -static sector_t max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - static int btree_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(inode->i_sb->s_bdev)) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; + int err; + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + err = map_bh_to_logical(root, bh, iblock); + return err; } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, @@ -164,8 +186,8 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { - printk("checksum verify failed on %lu\n", - bh->b_blocknr); + printk("checksum verify failed on %Lu\n", + bh_blocknr(bh)); return 1; } } else { @@ -386,10 +408,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); int ret; struct btrfs_super_block *disk_super; + struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -422,6 +446,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); + dev_lookup->block_start = 0; + dev_lookup->num_blocks = (u32)-2; + dev_lookup->bdev = sb->s_bdev; + ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); + BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -432,6 +463,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + radix_tree_delete(&fs_info->dev_radix, (u32)-2); + dev_lookup->block_start = btrfs_super_device_block_start(disk_super); + dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, + dev_lookup->block_start + + dev_lookup->num_blocks, dev_lookup); + BUG_ON(ret); + fs_info->disk_super = disk_super; dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); @@ -459,7 +498,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, - root->fs_info->tree_root->node->b_blocknr); + bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); @@ -506,6 +545,29 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } +static int free_dev_radix(struct btrfs_fs_info *fs_info) +{ + struct dev_lookup *lookup[8]; + struct block_device *super_bdev = fs_info->sb->s_bdev; + int ret; + int i; + while(1) { + ret = radix_tree_gang_lookup(&fs_info->dev_radix, + (void **)lookup, 0, + ARRAY_SIZE(lookup)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + if (lookup[i]->bdev != super_bdev) + close_bdev_excl(lookup[i]->bdev); + radix_tree_delete(&fs_info->dev_radix, + lookup[i]->block_start + + lookup[i]->num_blocks); + kfree(lookup[i]); + } + } + return 0; +} int close_ctree(struct btrfs_root *root) { @@ -537,6 +599,8 @@ int close_ctree(struct btrfs_root *root) crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); + + free_dev_radix(fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 0ef6e6f714af..46cc5dc9d572 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -41,4 +41,5 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +u64 bh_blocknr(struct buffer_head *bh); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index be9630df506c..d560831c10a7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -80,7 +80,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, root->node->b_blocknr, 1); + return inc_block_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -563,7 +563,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, path->nodes[*level]->b_blocknr, + ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -607,7 +607,7 @@ out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, 1, 1); + bh_blocknr(path->nodes[*level]), 1, 1); btrfs_block_release(root, path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -635,7 +635,7 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - path->nodes[*level]->b_blocknr, + bh_blocknr(path->nodes[*level]), 1, 1); BUG_ON(ret); btrfs_block_release(root, path->nodes[*level]); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2972aaa9eb7e..4ff0cc1efb13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1700,7 +1700,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, subvol->b_blocknr); + btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); @@ -1713,7 +1713,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_inode_nblocks(inode_item, 1); btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, subvol->b_blocknr); + btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); mark_buffer_dirty(subvol); @@ -1803,7 +1803,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.offset = 1; key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - btrfs_set_root_blocknr(&new_root_item, root->node->b_blocknr); + btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e15a072407bf..3a15943ea8ed 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -119,10 +119,10 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) + if (old_extent_block == bh_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); + bh_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -176,7 +176,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(root->node->b_blocknr != + WARN_ON(bh_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; @@ -191,7 +191,7 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, dirty->root = root; root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - root->node->b_blocknr); + bh_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); -- cgit v1.2.3 From 8352d8a473ac84bf7a1c69690b626946d744ca58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 10:43:05 -0400 Subject: Btrfs: add disk ioctl, mostly working Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.h | 6 +++ fs/btrfs/ioctl.h | 2 + fs/btrfs/super.c | 96 +++++++++++++++++++++++++++++++++---- fs/btrfs/transaction.c | 6 +++ 5 files changed, 221 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c872a7e67abd..3ba4df20c913 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,28 @@ struct dev_lookup { struct block_device *bdev; }; +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks) +{ + struct dev_lookup *lookup; + char b[BDEVNAME_SIZE]; + int ret; + + lookup = kmalloc(sizeof(*lookup), GFP_NOFS); + if (!lookup) + return -ENOMEM; + lookup->block_start = block_start; + lookup->num_blocks = num_blocks; + lookup->bdev = bdev; +printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); + + ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + + num_blocks - 1, lookup); + return ret; +} + u64 bh_blocknr(struct buffer_head *bh) { int blkbits = bh->b_page->mapping->host->i_blkbits; @@ -34,6 +56,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { + printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", + bh_blocknr(buf), btrfs_header_blocknr(&node->header)); BUG(); } return 0; @@ -76,11 +100,10 @@ out_unlock: return ret; } -static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { struct dev_lookup *lookup[2]; - char b[BDEVNAME_SIZE]; int ret; @@ -96,7 +119,6 @@ static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, } bh->b_bdev = lookup[0]->bdev; bh->b_blocknr = logical - lookup[0]->block_start; -printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); set_buffer_mapped(bh); ret = 0; out: @@ -126,7 +148,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - err = map_bh_to_logical(root, bh, first_block); + err = btrfs_map_bh_to_logical(root, bh, first_block); BUG_ON(err); } if (bh_blocknr(bh) == blocknr) { @@ -150,7 +172,7 @@ static int btree_get_block(struct inode *inode, sector_t iblock, { int err; struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = map_bh_to_logical(root, bh, iblock); + err = btrfs_map_bh_to_logical(root, bh, iblock); return err; } @@ -396,6 +418,89 @@ printk("all worked\n"); return root; } +int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, + char *filename, int name_len) +{ + char *null_filename; + struct block_device *bdev; + int ret; + + if (block_start == 0) { +printk("skipping disk with block_start == 0\n"); +return 0; + } + null_filename = kmalloc(name_len + 1, GFP_NOFS); + if (!null_filename) + return -ENOMEM; + memcpy(null_filename, filename, name_len); + null_filename[name_len] = '\0'; + + bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + set_blocksize(bdev, root->fs_info->sb->s_blocksize); + ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + BUG_ON(ret); + ret = 0; +out: + kfree(null_filename); + return ret; +} + +static int read_device_info(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_device_item *dev_item; + int nritems; + int slot; + + root = root->fs_info->dev_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = 0; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + while(1) { + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); +printk("found key %Lu %Lu\n", key.objectid, key.offset); + ret = btrfs_open_disk(root, key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + path->slots[0]++; + } + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), @@ -463,17 +568,26 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + i_size_write(fs_info->btree_inode, + btrfs_super_total_blocks(disk_super) << + fs_info->btree_inode->i_blkbits); + radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + - dev_lookup->num_blocks, dev_lookup); + dev_lookup->num_blocks - 1, dev_lookup); BUG_ON(ret); fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); + + ret = read_device_info(dev_root); + BUG_ON(ret); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -562,7 +676,7 @@ static int free_dev_radix(struct btrfs_fs_info *fs_info) close_bdev_excl(lookup[i]->bdev); radix_tree_delete(&fs_info->dev_radix, lookup[i]->block_start + - lookup[i]->num_blocks); + lookup[i]->num_blocks - 1); kfree(lookup[i]); } } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 46cc5dc9d572..aa9f56c38419 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -42,4 +42,10 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks); +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical); #endif diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 201fb3270723..497fadd98c3a 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -10,4 +10,6 @@ struct btrfs_ioctl_vol_args { #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4ff0cc1efb13..c46d7eafcf62 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1073,6 +1073,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, } map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); goto out; } @@ -1092,7 +1093,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; - map_bh(result, inode->i_sb, blocknr + iblock - extent_start); + btrfs_map_bh_to_logical(root, result, blocknr + iblock - + extent_start); goto out; } allocate: @@ -1112,6 +1114,7 @@ allocate: set_buffer_new(result); map_bh(result, inode->i_sb, blocknr); + btrfs_map_bh_to_logical(root, result, blocknr); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -1153,12 +1156,6 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } -static int btrfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, btrfs_get_block); -} - static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { return nobh_writepage(page, btrfs_get_block, wbc); @@ -1831,12 +1828,81 @@ printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objecti return 0; } +static int add_disk(struct btrfs_root *root, char *name, int namelen) +{ + struct block_device *bdev; + struct btrfs_path *path; + struct super_block *sb = root->fs_info->sb; + struct btrfs_root *dev_root = root->fs_info->dev_root; + struct btrfs_trans_handle *trans; + struct btrfs_device_item *dev_item; + struct btrfs_key key; + u16 item_size; + u64 num_blocks; + u64 new_blocks; + int ret; +printk("adding disk %s\n", name); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); + bdev = open_bdev_excl(name, O_RDWR, sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); +printk("open bdev excl failed ret %d\n", ret); + goto out_nolock; + } + set_blocksize(bdev, sb->s_blocksize); + new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits; + key.objectid = num_blocks; + key.offset = new_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&dev_root->fs_info->fs_mutex); + trans = btrfs_start_transaction(dev_root, 1); + item_size = sizeof(*dev_item) + namelen; +printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size); + ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size); + if (ret) { +printk("insert failed %d\n", ret); + close_bdev_excl(bdev); + if (ret > 0) + ret = -EEXIST; + goto out; + } + dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_device_item); + btrfs_set_device_pathlen(dev_item, namelen); + memcpy(dev_item + 1, name, namelen); + mark_buffer_dirty(path->nodes[0]); + + ret = btrfs_insert_dev_radix(root, bdev, num_blocks, new_blocks); + + if (!ret) { + btrfs_set_super_total_blocks(root->fs_info->disk_super, + num_blocks + new_blocks); + i_size_write(root->fs_info->btree_inode, + (num_blocks + new_blocks) << + root->fs_info->btree_inode->i_blkbits); + } + +out: + ret = btrfs_commit_transaction(trans, dev_root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); +out_nolock: + btrfs_free_path(path); + + return ret; +} + static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - int ret; + int ret = 0; int namelen; struct btrfs_path *path; u64 root_dirid; @@ -1869,10 +1935,21 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; + case BTRFS_IOC_ADD_DISK: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + vol_args.name[namelen] = '\0'; + ret = add_disk(root, vol_args.name, namelen); + break; default: return -ENOTTY; } - return 0; + return ret; } static struct kmem_cache *btrfs_inode_cachep; @@ -2004,7 +2081,6 @@ static struct file_operations btrfs_dir_file_operations = { static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, - .readpages = btrfs_readpages, .writepage = btrfs_writepage, .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3a15943ea8ed..8740752f3845 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -116,7 +116,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *dev_root = fs_info->dev_root; + if (btrfs_super_device_root(fs_info->disk_super) != + bh_blocknr(dev_root->node)) { + btrfs_set_super_device_root(fs_info->disk_super, + bh_blocknr(dev_root->node)); + } while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) -- cgit v1.2.3 From b4100d64241fed93a3f821ddf59d11ab4443a3ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 12:14:00 -0400 Subject: Btrfs: add a device id to device items Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 36 ++++++++++++++++++++++++++++++++++++ fs/btrfs/disk-io.c | 31 ++++++++++++++++++++----------- fs/btrfs/disk-io.h | 1 + fs/btrfs/super.c | 9 ++++++++- 4 files changed, 65 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 454eb88611bb..ef3583cf74d6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -91,10 +91,12 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; + __le64 last_device_id; /* fields below here vary with the underlying disk */ __le64 device_block_start; __le64 device_num_blocks; __le64 device_root; + __le64 device_id; } __attribute__ ((__packed__)); /* @@ -230,6 +232,7 @@ struct btrfs_csum_item { struct btrfs_device_item { __le16 pathlen; + __le64 device_id; } __attribute__ ((__packed__)); struct crypto_hash; @@ -798,6 +801,28 @@ static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 s->root_dir_objectid = cpu_to_le64(val); } +static inline u64 btrfs_super_last_device_id(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->last_device_id); +} + +static inline void btrfs_set_super_last_device_id(struct btrfs_super_block *s, + u64 val) +{ + s->last_device_id = cpu_to_le64(val); +} + +static inline u64 btrfs_super_device_id(struct btrfs_super_block *s) +{ + return le64_to_cpu(s->device_id); +} + +static inline void btrfs_set_super_device_id(struct btrfs_super_block *s, + u64 val) +{ + s->device_id = cpu_to_le64(val); +} + static inline u64 btrfs_super_device_block_start(struct btrfs_super_block *s) { return le64_to_cpu(s->device_block_start); @@ -910,6 +935,17 @@ static inline void btrfs_set_device_pathlen(struct btrfs_device_item *d, d->pathlen = cpu_to_le16(val); } +static inline u64 btrfs_device_id(struct btrfs_device_item *d) +{ + return le64_to_cpu(d->device_id); +} + +static inline void btrfs_set_device_id(struct btrfs_device_item *d, + u64 val) +{ + d->device_id = cpu_to_le64(val); +} + static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3ba4df20c913..06b969c14625 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,11 +13,13 @@ struct dev_lookup { u64 block_start; u64 num_blocks; + u64 device_id; struct block_device *bdev; }; int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, + u64 device_id, u64 block_start, u64 num_blocks) { @@ -31,6 +33,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->block_start = block_start; lookup->num_blocks = num_blocks; lookup->bdev = bdev; + lookup->device_id = device_id; printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + @@ -418,17 +421,14 @@ printk("all worked\n"); return root; } -int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, - char *filename, int name_len) +static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, + u64 block_start, u64 num_blocks, + char *filename, int name_len) { char *null_filename; struct block_device *bdev; int ret; - if (block_start == 0) { -printk("skipping disk with block_start == 0\n"); -return 0; - } null_filename = kmalloc(name_len + 1, GFP_NOFS); if (!null_filename) return -ENOMEM; @@ -441,7 +441,8 @@ return 0; goto out; } set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + ret = btrfs_insert_dev_radix(root, bdev, device_id, + block_start, num_blocks); BUG_ON(ret); ret = 0; out: @@ -490,10 +491,14 @@ static int read_device_info(struct btrfs_root *root) } dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); printk("found key %Lu %Lu\n", key.objectid, key.offset); - ret = btrfs_open_disk(root, key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); + if (btrfs_device_id(dev_item) != + btrfs_super_device_id(root->fs_info->disk_super)) { + ret = btrfs_open_disk(root, btrfs_device_id(dev_item), + key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + } path->slots[0]++; } btrfs_free_path(path); @@ -556,6 +561,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) dev_lookup->block_start = 0; dev_lookup->num_blocks = (u32)-2; dev_lookup->bdev = sb->s_bdev; + dev_lookup->device_id = 0; ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, @@ -575,6 +581,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + dev_lookup->device_id = btrfs_super_device_id(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + dev_lookup->num_blocks - 1, dev_lookup); @@ -659,6 +667,7 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } + static int free_dev_radix(struct btrfs_fs_info *fs_info) { struct dev_lookup *lookup[8]; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index aa9f56c38419..444ebb0141ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -44,6 +44,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, + u64 device_id, u64 block_start, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c46d7eafcf62..8dcf600e39ef 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1840,7 +1840,9 @@ static int add_disk(struct btrfs_root *root, char *name, int namelen) u16 item_size; u64 num_blocks; u64 new_blocks; + u64 device_id; int ret; + printk("adding disk %s\n", name); path = btrfs_alloc_path(); if (!path) @@ -1875,9 +1877,14 @@ printk("insert failed %d\n", ret); path->slots[0], struct btrfs_device_item); btrfs_set_device_pathlen(dev_item, namelen); memcpy(dev_item + 1, name, namelen); + + device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1; + btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id); + btrfs_set_device_id(dev_item, device_id); mark_buffer_dirty(path->nodes[0]); - ret = btrfs_insert_dev_radix(root, bdev, num_blocks, new_blocks); + ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks, + new_blocks); if (!ret) { btrfs_set_super_total_blocks(root->fs_info->disk_super, -- cgit v1.2.3 From 6567e837df07e43bffc08ac40858af8133a007bf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 16 Apr 2007 09:22:45 -0400 Subject: Btrfs: early work to file_write in big extents Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 54 +++++++++++++++++++++++++ fs/btrfs/ctree.h | 15 +++++++ fs/btrfs/file-item.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/super.c | 75 ++++++++++++++++++++++++++-------- 4 files changed, 232 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e2fc6f0d0243..6b76fd967153 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1249,6 +1249,60 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size) +{ + int ret = 0; + int slot; + int slot_orig; + struct btrfs_leaf *leaf; + struct buffer_head *leaf_buf; + u32 nritems; + unsigned int data_end; + unsigned int old_data; + unsigned int old_size; + int i; + + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; + leaf = btrfs_buffer_leaf(leaf_buf); + + nritems = btrfs_header_nritems(&leaf->header); + data_end = leaf_data_end(root, leaf); + + if (btrfs_leaf_free_space(root, leaf) < data_size) + BUG(); + slot = path->slots[0]; + old_data = btrfs_item_end(leaf->items + slot); + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff - data_size); + } + /* shift the data */ + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end - data_size, btrfs_leaf_data(leaf) + + data_end, old_data - data_end); + data_end = old_data; + old_size = btrfs_item_size(leaf->items + slot); + btrfs_set_item_size(leaf->items + slot, old_size + data_size); + btrfs_mark_buffer_dirty(leaf_buf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) + BUG(); + check_leaf(root, path, 0); + return ret; +} + /* * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ef3583cf74d6..796f19d03ab5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -227,6 +227,7 @@ struct btrfs_file_extent_item { } __attribute__ ((__packed__)); struct btrfs_csum_item { + __le64 extent_offset; u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); @@ -924,6 +925,17 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } +static inline u64 btrfs_csum_extent_offset(struct btrfs_csum_item *c) +{ + return le64_to_cpu(c->extent_offset); +} + +static inline void btrfs_set_csum_extent_offset(struct btrfs_csum_item *c, + u64 val) +{ + c->extent_offset = cpu_to_le64(val); +} + static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) { return le16_to_cpu(d->pathlen); @@ -1002,6 +1014,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* ctree.c */ +int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_path *path, u32 data_size); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1071,6 +1085,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, + u64 extent_offset, char *data, size_t len); int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d9fd7f4d3794..93d42d650820 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -3,6 +3,9 @@ #include "disk-io.h" #include "transaction.h" +#define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item)) / \ + sizeof(struct btrfs_csum_item)) - 1)) int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, @@ -43,6 +46,54 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, return 0; } +static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset) +{ + int ret; + struct btrfs_key file_key; + struct btrfs_key found_key; + struct btrfs_csum_item *item; + struct btrfs_leaf *leaf; + u64 csum_offset = 0; + + file_key.objectid = objectid; + file_key.offset = offset; + file_key.flags = 0; + btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); + if (ret < 0) + goto fail; + leaf = btrfs_buffer_leaf(path->nodes[0]); + if (ret > 0) { + ret = 1; + if (path->slots[0] == 0) + goto fail; + path->slots[0]--; + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || + found_key.objectid != objectid) { + goto fail; + } + csum_offset = (offset - found_key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (csum_offset >= + btrfs_item_size(leaf->items + path->slots[0]) / + sizeof(struct btrfs_csum_item)) { + goto fail; + } + } + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); + item += csum_offset; + return item; +fail: + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); +} + + int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -52,11 +103,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_key file_key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; + struct btrfs_csum_item *csum_item; + csum_item = __lookup_csum_item(root, path, objectid, offset); + if (IS_ERR(csum_item)) + return PTR_ERR(csum_item); file_key.objectid = objectid; - file_key.offset = offset; + file_key.offset = btrfs_csum_extent_offset(csum_item); file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); + btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; } @@ -64,12 +120,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, + u64 extent_offset, char *data, size_t len) { int ret; struct btrfs_key file_key; + struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; + struct btrfs_leaf *leaf; + u64 csum_offset; path = btrfs_alloc_path(); BUG_ON(!path); @@ -78,14 +138,50 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + ret = btrfs_search_slot(trans, root, &file_key, path, + sizeof(struct btrfs_csum_item), 1); + if (ret < 0) + goto fail; + if (ret == 0) { + csum_offset = 0; + goto csum; + } + if (path->slots[0] == 0) { + btrfs_release_path(root, path); + goto insert; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + csum_offset = (offset - found_key.offset) >> + root->fs_info->sb->s_blocksize_bits; + if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || + found_key.objectid != objectid || + csum_offset >= MAX_CSUM_ITEMS(root)) { + btrfs_release_path(root, path); + goto insert; + } + if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + sizeof(struct btrfs_csum_item)) { + ret = btrfs_extend_item(trans, root, path, + sizeof(struct btrfs_csum_item)); + BUG_ON(ret); + goto csum; + } + +insert: + csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, - BTRFS_CSUM_SIZE); + sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) goto fail; +csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; + item += csum_offset; ret = btrfs_csum_data(root, data, len, item->csum); + btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); @@ -111,12 +207,13 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); - if (ret) + + item = __lookup_csum_item(root, path, objectid, offset); + if (IS_ERR(item)) { + ret = PTR_ERR(item); goto fail; - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); - ret = 0; + } + ret = btrfs_csum_data(root, data, len, result); WARN_ON(ret); if (memcmp(result, item->csum, BTRFS_CSUM_SIZE)) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8dcf600e39ef..ec689992fdf4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1027,8 +1027,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - if (create) + if (create) { trans = btrfs_start_transaction(root, 1); + WARN_ON(1); + } ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, @@ -1055,9 +1057,8 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, /* exact match found, use it, FIXME, deal with extents * other than the page size */ - if (ret == 0) { + if (0 && ret == 0) { err = 0; - BUG_ON(btrfs_file_extent_disk_num_blocks(item) != 1); if (create && btrfs_file_extent_generation(item) != trans->transid) { struct btrfs_key ins; @@ -1072,7 +1073,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = ins.objectid; } - map_bh(result, inode->i_sb, blocknr); btrfs_map_bh_to_logical(root, result, blocknr); goto out; } @@ -1231,6 +1231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct file *file, struct page **pages, size_t num_pages, + u64 extent_offset, loff_t pos, size_t write_bytes) { @@ -1250,6 +1251,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, trans = btrfs_start_transaction(root, 1); btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, + extent_offset, kmap(pages[i]), PAGE_CACHE_SIZE); kunmap(pages[i]); SetPageChecked(pages[i]); @@ -1279,7 +1281,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, loff_t pos, unsigned long first_index, unsigned long last_index, - size_t write_bytes) + size_t write_bytes, + u64 alloc_extent_start) { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; @@ -1288,6 +1291,8 @@ static int prepare_pages(struct btrfs_trans_handle *trans, int err = 0; int ret; int this_write; + struct buffer_head *bh; + struct buffer_head *head; loff_t isize = i_size_read(inode); memset(pages, 0, num_pages * sizeof(struct page *)); @@ -1307,14 +1312,20 @@ static int prepare_pages(struct btrfs_trans_handle *trans, BUG_ON(ret); lock_page(pages[i]); } - ret = nobh_prepare_write(pages[i], offset, - offset + this_write, - btrfs_get_block); + create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + head = page_buffers(pages[i]); + bh = head; + do { + err = btrfs_map_bh_to_logical(root, bh, + alloc_extent_start); + BUG_ON(err); + if (err) + goto failed_truncate; + bh = bh->b_this_page; + alloc_extent_start++; + } while (bh != head); pos += this_write; - if (ret) { - err = ret; - goto failed_truncate; - } WARN_ON(this_write > write_bytes); write_bytes -= this_write; } @@ -1343,11 +1354,23 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct page *pages[1]; unsigned long first_index; unsigned long last_index; + u64 start_pos; + u64 num_blocks; + u64 alloc_extent_start; + u64 orig_extent_start; + struct btrfs_trans_handle *trans; if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; + start_pos = pos & ~(root->blocksize - 1); + /* FIXME */ + if (start_pos != pos) + return -EINVAL; + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1362,20 +1385,41 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out_unlock; + } + ret = btrfs_alloc_file_extent(trans, root, inode->i_ino, + start_pos, num_blocks, 1, + &alloc_extent_start); + BUG_ON(ret); + + orig_extent_start = start_pos; + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + while(count > 0) { size_t offset = pos & (PAGE_CACHE_SIZE - 1); size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ret = prepare_pages(NULL, root, file, pages, num_pages, - pos, first_index, last_index, write_bytes); + pos, first_index, last_index, + write_bytes, alloc_extent_start); BUG_ON(ret); + /* FIXME blocks != pagesize */ + alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); + num_pages, orig_extent_start, + pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); @@ -1387,6 +1431,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, balance_dirty_pages_ratelimited(inode->i_mapping); cond_resched(); } +out_unlock: mutex_unlock(&inode->i_mutex); out: *ppos = pos; @@ -1806,8 +1851,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) &new_root_item); BUG_ON(ret); -printk("adding snapshot name %.*s root %Lu %Lu %u\n", namelen, name, key.objectid, key.offset, key.flags); - /* * insert the directory item */ -- cgit v1.2.3 From b18c6685810af8e6763760711aece31ccc7a8ea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 13:26:50 -0400 Subject: Btrfs: progress on file_write Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 57 ++++++++++ fs/btrfs/ctree.h | 21 +++- fs/btrfs/extent-tree.c | 20 ++-- fs/btrfs/file-item.c | 61 +++++++---- fs/btrfs/super.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 394 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6b76fd967153..43d4779abdd1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1249,6 +1249,63 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size) +{ + int ret = 0; + int slot; + int slot_orig; + struct btrfs_leaf *leaf; + struct buffer_head *leaf_buf; + u32 nritems; + unsigned int data_end; + unsigned int old_data_start; + unsigned int old_size; + unsigned int size_diff; + int i; + + slot_orig = path->slots[0]; + leaf_buf = path->nodes[0]; + leaf = btrfs_buffer_leaf(leaf_buf); + + nritems = btrfs_header_nritems(&leaf->header); + data_end = leaf_data_end(root, leaf); + + slot = path->slots[0]; + old_data_start = btrfs_item_offset(leaf->items + slot); + old_size = btrfs_item_size(leaf->items + slot); + BUG_ON(old_size <= new_size); + size_diff = old_size - new_size; + + BUG_ON(slot < 0); + BUG_ON(slot >= nritems); + + /* + * item0..itemN ... dataN.offset..dataN.size .. data0.size + */ + /* first correct the data pointers */ + for (i = slot; i < nritems; i++) { + u32 ioff = btrfs_item_offset(leaf->items + i); + btrfs_set_item_offset(leaf->items + i, + ioff + size_diff); + } + /* shift the data */ +printk("truncate item, new_size %u old_size %u, diff %u, bufp %p, dst, %p, num %u, old_data_start %u, data_end %u\n", new_size, old_size, size_diff, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, old_data_start-data_end, old_data_start, data_end); + btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + data_end + size_diff, btrfs_leaf_data(leaf) + + data_end, old_data_start + new_size - data_end); + btrfs_set_item_size(leaf->items + slot, new_size); + btrfs_mark_buffer_dirty(leaf_buf); + + ret = 0; + if (btrfs_leaf_free_space(root, leaf) < 0) + BUG(); + check_leaf(root, path, 0); + return ret; +} + int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 796f19d03ab5..ca3ab160f460 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -999,7 +999,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) -/* extent-item.c */ +/* extent-tree.c */ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, @@ -1013,9 +1013,16 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); +int btrfs_truncate_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u32 new_size); int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); @@ -1073,11 +1080,10 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, int mod); /* file-item.c */ -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result); + u64 objectid, u64 pos, u64 offset, + u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, @@ -1090,6 +1096,11 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow); /* super.c */ extern struct subsystem btrfs_subsys; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d560831c10a7..2cee9df001f6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,8 +12,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks) +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num_blocks) { struct btrfs_path *path; int ret; @@ -50,8 +51,9 @@ static int inc_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, u32 *refs) +static int lookup_extent_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 blocknr, + u64 num_blocks, u32 *refs) { struct btrfs_path *path; int ret; @@ -80,7 +82,7 @@ static int lookup_block_ref(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return inc_block_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -107,13 +109,13 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); - ret = inc_block_ref(trans, root, + ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { blocknr = btrfs_node_blockptr(buf_node, i); - ret = inc_block_ref(trans, root, blocknr, 1); + ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); BUG_ON(ret); } } @@ -563,7 +565,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_block_ref(trans, root, bh_blocknr(path->nodes[*level]), + ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), 1, &refs); BUG_ON(ret); if (refs > 1) @@ -587,7 +589,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), path->slots[*level]); - ret = lookup_block_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 93d42d650820..f49968ad0a07 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -6,13 +6,11 @@ #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item)) / \ sizeof(struct btrfs_csum_item)) - 1)) -int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, +int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, u64 offset, - u64 num_blocks, u64 hint_block, - u64 *result) + u64 objectid, u64 pos, + u64 offset, u64 num_blocks) { - struct btrfs_key ins; int ret = 0; struct btrfs_file_extent_item *item; struct btrfs_key file_key; @@ -21,11 +19,13 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + /* ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, (u64)-1, &ins); + */ BUG_ON(ret); file_key.objectid = objectid; - file_key.offset = offset; + file_key.offset = pos; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); @@ -34,21 +34,22 @@ int btrfs_alloc_file_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - btrfs_set_file_extent_disk_num_blocks(item, ins.offset); + btrfs_set_file_extent_disk_blocknr(item, offset); + btrfs_set_file_extent_disk_num_blocks(item, num_blocks); btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, ins.offset); + btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); - *result = ins.objectid; btrfs_release_path(root, path); btrfs_free_path(path); return 0; } -static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, - struct btrfs_path *path, - u64 objectid, u64 offset) +struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + u64 objectid, u64 offset, + int cow) { int ret; struct btrfs_key file_key; @@ -61,19 +62,23 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - ret = btrfs_search_slot(NULL, root, &file_key, path, 0, 0); +printk("__lookup for %Lu\n", offset); + ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = btrfs_buffer_leaf(path->nodes[0]); if (ret > 0) { ret = 1; - if (path->slots[0] == 0) + if (path->slots[0] == 0) { +printk("fail1\n"); goto fail; + } path->slots[0]--; btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { +printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid, objectid); goto fail; } csum_offset = (offset - found_key.offset) >> @@ -81,6 +86,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { +printk("fail3, csum offset %lu size %u\n", csum_offset, btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)); goto fail; } } @@ -89,7 +95,7 @@ static struct btrfs_csum_item *__lookup_csum_item(struct btrfs_root *root, return item; fail: if (ret > 0) - ret = -EIO; + ret = -ENOENT; return ERR_PTR(ret); } @@ -105,7 +111,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int cow = mod != 0; struct btrfs_csum_item *csum_item; - csum_item = __lookup_csum_item(root, path, objectid, offset); + csum_item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); if (IS_ERR(csum_item)) return PTR_ERR(csum_item); file_key.objectid = objectid; @@ -113,7 +119,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); +printk("lookup file extent searches for %Lu\n", file_key.offset); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); +printk("ret is %d\n", ret); return ret; } @@ -134,17 +142,23 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); + if (!IS_ERR(item)) + goto found; + btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); +printk("searching for csum %Lu %Lu\n", objectid, offset); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); +printk("ret %d\n", ret); if (ret < 0) goto fail; if (ret == 0) { - csum_offset = 0; - goto csum; + BUG(); } if (path->slots[0] == 0) { btrfs_release_path(root, path); @@ -153,12 +167,15 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path->slots[0]--; leaf = btrfs_buffer_leaf(path->nodes[0]); btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); +printk("found key %Lu %Lu %u\n", found_key.objectid, found_key.offset, found_key.flags); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; +printk("csum_offset %Lu\n", csum_offset); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { btrfs_release_path(root, path); +printk("insert1\n"); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / @@ -166,11 +183,13 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, ret = btrfs_extend_item(trans, root, path, sizeof(struct btrfs_csum_item)); BUG_ON(ret); +printk("item extended\n"); goto csum; } insert: csum_offset = 0; +printk("inserting item %Lu %Lu %u\n", file_key.objectid, file_key.offset, file_key.flags); ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) @@ -180,12 +199,14 @@ csum: struct btrfs_csum_item); ret = 0; item += csum_offset; +found: ret = btrfs_csum_data(root, data, len, item->csum); btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); +printk("return ret %d\n", ret); return ret; } @@ -208,7 +229,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); mutex_lock(&root->fs_info->fs_mutex); - item = __lookup_csum_item(root, path, objectid, offset); + item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ec689992fdf4..6a56416147e6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1089,7 +1089,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); extent_start = extent_start >> inode->i_blkbits; - extent_start += btrfs_file_extent_offset(item); extent_end = extent_start + btrfs_file_extent_num_blocks(item); if (iblock >= extent_start && iblock < extent_end) { err = 0; @@ -1103,6 +1102,7 @@ allocate: err = 0; goto out; } +#if 0 ret = btrfs_alloc_file_extent(trans, root, objectid, iblock << inode->i_blkbits, 1, extent_end, &blocknr); @@ -1115,9 +1115,11 @@ allocate: map_bh(result, inode->i_sb, blocknr); btrfs_map_bh_to_logical(root, result, blocknr); +#endif out: btrfs_release_path(root, path); btrfs_free_path(path); +printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr); if (trans) btrfs_end_transaction(trans, root); return err; @@ -1273,8 +1275,244 @@ failed: return err; } -static int prepare_pages(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +static int drop_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + struct btrfs_path *path; + struct btrfs_leaf *leaf; + struct btrfs_key key; + int slot; + struct btrfs_csum_item *item; + char *old_block = NULL; + u64 cur = start; + u64 found_end; + u64 num_csums; + u64 item_size; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + while(cur < end) { + item = btrfs_lookup_csum(trans, root, path, + inode->i_ino, cur, 1); + if (IS_ERR(item)) { + cur += root->blocksize; + continue; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + item_size = btrfs_item_size(leaf->items + slot); + num_csums = item_size / sizeof(struct btrfs_csum_item); + found_end = key.offset + (num_csums << inode->i_blkbits); + cur = found_end; + + if (found_end > end) { + char *src; + old_block = kmalloc(root->blocksize, GFP_NOFS); + src = btrfs_item_ptr(leaf, slot, char); + memcpy(old_block, src, item_size); + } + if (key.offset < start) { + u64 new_size = (start - key.offset) >> + inode->i_blkbits; + new_size *= sizeof(struct btrfs_csum_item); + ret = btrfs_truncate_item(trans, root, path, new_size); + BUG_ON(ret); + } else { + btrfs_del_item(trans, root, path); + } + btrfs_release_path(root, path); + if (found_end > end) { + char *dst; + int i; + int new_size; + + num_csums = (found_end - end) >> inode->i_blkbits; + new_size = num_csums * sizeof(struct btrfs_csum_item); + key.offset = end; + ret = btrfs_insert_empty_item(trans, root, path, + &key, new_size); + BUG_ON(ret); + dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], char); + memcpy(dst, old_block + item_size - new_size, + new_size); + item = (struct btrfs_csum_item *)dst; + for (i = 0; i < num_csums; i++) { + btrfs_set_csum_extent_offset(item, end); + item++; + } + mark_buffer_dirty(path->nodes[0]); + kfree(old_block); + break; + } + } + btrfs_free_path(path); + return 0; +} + +static int drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end) +{ + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int slot; + struct btrfs_file_extent_item *extent; + u64 extent_end; + int keep; + struct btrfs_file_extent_item old; + struct btrfs_path *path; + u64 search_start = start; + int bookend; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +search_again: +printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret != 0) { +printk("lookup failed\n"); + goto out; + } + while(1) { + keep = 0; + bookend = 0; + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + +printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags); + + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); +printk("extent end is %Lu\n", extent_end); + if (key.offset >= end || key.objectid != inode->i_ino) { + ret = 0; + goto out; + } + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + goto next_leaf; + + if (end < extent_end && end >= key.offset) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + bookend = 1; + } + + if (start > key.offset) { + u64 new_num; + /* truncate existing extent */ + keep = 1; + WARN_ON(start & (root->blocksize - 1)); + new_num = (start - key.offset) >> inode->i_blkbits; +printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent)); + btrfs_set_file_extent_num_blocks(extent, new_num); +printk("now %Lu\n", btrfs_file_extent_num_blocks(extent)); + + mark_buffer_dirty(path->nodes[0]); + } + if (!keep) { + u64 disk_blocknr; + u64 disk_num_blocks; +printk("del old\n"); + disk_blocknr = btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + search_start = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_free_extent(trans, root, disk_blocknr, + disk_num_blocks, 0); + + BUG_ON(ret); + if (!bookend && search_start >= end) { + ret = 0; + goto out; + } + if (!bookend) + goto search_again; + } + if (bookend) { + /* create bookend */ + struct btrfs_key ins; +printk("bookend! extent end %Lu\n", extent_end); + ins.objectid = inode->i_ino; + ins.offset = end; + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + + btrfs_release_path(root, path); + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent)); + BUG_ON(ret); + extent = btrfs_item_ptr( + btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(extent, + btrfs_file_extent_disk_blocknr(&old)); + btrfs_set_file_extent_disk_num_blocks(extent, + btrfs_file_extent_disk_num_blocks(&old)); + + btrfs_set_file_extent_offset(extent, + btrfs_file_extent_offset(&old) + + ((end - key.offset) >> inode->i_blkbits)); + WARN_ON(btrfs_file_extent_num_blocks(&old) < + (end - key.offset) >> inode->i_blkbits); + btrfs_set_file_extent_num_blocks(extent, + btrfs_file_extent_num_blocks(&old) - + ((end - key.offset) >> inode->i_blkbits)); + + btrfs_set_file_extent_generation(extent, + btrfs_file_extent_generation(&old)); +printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent)); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; + goto out_nocsum; + } +next_leaf: + if (slot >= btrfs_header_nritems(&leaf->header) - 1) { + ret = btrfs_next_leaf(root, path); + if (ret) { + ret = 0; + goto out; + } + } else { + path->slots[0]++; + } + } + +out: + ret = drop_csums(trans, root, inode, start, end); + BUG_ON(ret); + +out_nocsum: + btrfs_free_path(path); + return ret; +} + +static int prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1289,7 +1527,6 @@ static int prepare_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; int offset; int err = 0; - int ret; int this_write; struct buffer_head *bh; struct buffer_head *head; @@ -1305,18 +1542,21 @@ static int prepare_pages(struct btrfs_trans_handle *trans, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - if (!PageUptodate(pages[i]) && - (pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize) { +#if 0 + if ((pages[i]->index == first_index || + pages[i]->index == last_index) && pos < isize && + !PageUptodate(pages[i])) { ret = mpage_readpage(pages[i], btrfs_get_block); BUG_ON(ret); lock_page(pages[i]); } +#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); bh = head; do { +printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start); err = btrfs_map_bh_to_logical(root, bh, alloc_extent_start); BUG_ON(err); @@ -1351,7 +1591,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, int ret = 0; struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[1]; + struct page *pages[8]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1359,6 +1599,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 alloc_extent_start; u64 orig_extent_start; struct btrfs_trans_handle *trans; + struct btrfs_key ins; if (file->f_flags & O_DIRECT) return -EINVAL; @@ -1390,16 +1631,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } - ret = btrfs_alloc_file_extent(trans, root, inode->i_ino, - start_pos, num_blocks, 1, - &alloc_extent_start); - BUG_ON(ret); - + if (start_pos < inode->i_size) { + ret = drop_extents(trans, root, inode, + start_pos, + (pos + count + root->blocksize -1) & + ~(root->blocksize - 1)); + } orig_extent_start = start_pos; - ret = btrfs_end_transaction(trans, root); + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + (u64)-1, &ins); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset); BUG_ON(ret); + alloc_extent_start = ins.objectid; + ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); while(count > 0) { @@ -1407,16 +1656,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - ret = prepare_pages(NULL, root, file, pages, num_pages, +printk("num_pages is %lu\n", num_pages); + + memset(pages, 0, sizeof(pages)); + ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes, alloc_extent_start); BUG_ON(ret); + /* FIXME blocks != pagesize */ alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); +printk("2num_pages is %lu\n", num_pages); ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, orig_extent_start, pos, write_bytes); -- cgit v1.2.3 From 70b2befd0c8a4064715d8b340270650cc9d15af8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 15:39:32 -0400 Subject: Btrfs: rework csums and extent item ordering Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++-- fs/btrfs/ctree.h | 17 +------ fs/btrfs/dir-item.c | 28 ++++++++---- fs/btrfs/disk-io.c | 2 - fs/btrfs/file-item.c | 26 +---------- fs/btrfs/super.c | 123 +++++---------------------------------------------- 6 files changed, 37 insertions(+), 168 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 43d4779abdd1..5c160c236773 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -115,14 +115,14 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.offset > k2->offset) - return 1; - if (k1.offset < k2->offset) - return -1; if (k1.flags > k2->flags) return 1; if (k1.flags < k2->flags) return -1; + if (k1.offset > k2->offset) + return 1; + if (k1.offset < k2->offset) + return -1; return 0; } @@ -1292,7 +1292,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, ioff + size_diff); } /* shift the data */ -printk("truncate item, new_size %u old_size %u, diff %u, bufp %p, dst, %p, num %u, old_data_start %u, data_end %u\n", new_size, old_size, size_diff, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, old_data_start-data_end, old_data_start, data_end); btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca3ab160f460..d75a4d5bc010 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -44,14 +44,14 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le64 offset; __le32 flags; + __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u64 offset; u32 flags; + u64 offset; } __attribute__ ((__packed__)); /* @@ -227,7 +227,6 @@ struct btrfs_file_extent_item { } __attribute__ ((__packed__)); struct btrfs_csum_item { - __le64 extent_offset; u8 csum[BTRFS_CSUM_SIZE]; } __attribute__ ((__packed__)); @@ -925,17 +924,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u64 btrfs_csum_extent_offset(struct btrfs_csum_item *c) -{ - return le64_to_cpu(c->extent_offset); -} - -static inline void btrfs_set_csum_extent_offset(struct btrfs_csum_item *c, - u64 val) -{ - c->extent_offset = cpu_to_le64(val); -} - static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) { return le16_to_cpu(d->pathlen); @@ -1091,7 +1079,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, - u64 extent_offset, char *data, size_t len); int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 0ba46bc0da9a..cd4137a8b87e 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -103,7 +103,7 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); + // btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); while(1) { @@ -146,19 +146,29 @@ int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; + int overflow = 0; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); key.offset = objectid; - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (path->slots[0] == 0) - return 1; - path->slots[0]--; + + while(1) { + btrfs_set_key_overflow(&key, overflow); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ret; + if (ret > 0) { + if (overflow >= BTRFS_KEY_OVERFLOW_MAX) + return 1; + overflow++; + btrfs_set_key_overflow(&key, overflow); + btrfs_release_path(root, path); + continue; + } else { + /* found */ + break; + } } leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06b969c14625..a2a3f529cada 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,7 +24,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks) { struct dev_lookup *lookup; - char b[BDEVNAME_SIZE]; int ret; lookup = kmalloc(sizeof(*lookup), GFP_NOFS); @@ -34,7 +33,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->num_blocks = num_blocks; lookup->bdev = bdev; lookup->device_id = device_id; -printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + num_blocks - 1, lookup); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index f49968ad0a07..ff8f3339c684 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -62,23 +62,19 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); -printk("__lookup for %Lu\n", offset); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; leaf = btrfs_buffer_leaf(path->nodes[0]); if (ret > 0) { ret = 1; - if (path->slots[0] == 0) { -printk("fail1\n"); + if (path->slots[0] == 0) goto fail; - } path->slots[0]--; btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { -printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid, objectid); goto fail; } csum_offset = (offset - found_key.offset) >> @@ -86,7 +82,6 @@ printk("fail2 type %u %Lu %Lu\n", btrfs_key_type(&found_key), found_key.objectid if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { -printk("fail3, csum offset %lu size %u\n", csum_offset, btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)); goto fail; } } @@ -109,26 +104,18 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_key file_key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_csum_item *csum_item; - csum_item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); - if (IS_ERR(csum_item)) - return PTR_ERR(csum_item); file_key.objectid = objectid; - file_key.offset = btrfs_csum_extent_offset(csum_item); + file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); - btrfs_release_path(root, path); -printk("lookup file extent searches for %Lu\n", file_key.offset); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); -printk("ret is %d\n", ret); return ret; } int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, - u64 extent_offset, char *data, size_t len) { int ret; @@ -151,10 +138,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); -printk("searching for csum %Lu %Lu\n", objectid, offset); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); -printk("ret %d\n", ret); if (ret < 0) goto fail; if (ret == 0) { @@ -167,15 +152,12 @@ printk("ret %d\n", ret); path->slots[0]--; leaf = btrfs_buffer_leaf(path->nodes[0]); btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); -printk("found key %Lu %Lu %u\n", found_key.objectid, found_key.offset, found_key.flags); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; -printk("csum_offset %Lu\n", csum_offset); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { btrfs_release_path(root, path); -printk("insert1\n"); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / @@ -183,13 +165,11 @@ printk("insert1\n"); ret = btrfs_extend_item(trans, root, path, sizeof(struct btrfs_csum_item)); BUG_ON(ret); -printk("item extended\n"); goto csum; } insert: csum_offset = 0; -printk("inserting item %Lu %Lu %u\n", file_key.objectid, file_key.offset, file_key.flags); ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); if (ret != 0 && ret != -EEXIST) @@ -201,12 +181,10 @@ csum: item += csum_offset; found: ret = btrfs_csum_data(root, data, len, item->csum); - btrfs_set_csum_extent_offset(item, extent_offset); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); -printk("return ret %d\n", ret); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6a56416147e6..b2a2220d1352 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -524,7 +524,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; - int key_type = BTRFS_DIR_INDEX_KEY; + int key_type = BTRFS_DIR_ITEM_KEY; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -561,9 +561,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) item = leaf->items + slot; if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; - if (key_type == BTRFS_DIR_INDEX_KEY && - btrfs_disk_key_offset(&item->key) > root->highest_inode) - break; if (btrfs_disk_key_type(&item->key) != key_type) continue; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) @@ -1119,7 +1116,6 @@ allocate: out: btrfs_release_path(root, path); btrfs_free_path(path); -printk("mapping iblock %lu to %lu\n", iblock, result->b_blocknr); if (trans) btrfs_end_transaction(trans, root); return err; @@ -1233,7 +1229,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct file *file, struct page **pages, size_t num_pages, - u64 extent_offset, loff_t pos, size_t write_bytes) { @@ -1253,7 +1248,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, trans = btrfs_start_transaction(root, 1); btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, - extent_offset, kmap(pages[i]), PAGE_CACHE_SIZE); kunmap(pages[i]); SetPageChecked(pages[i]); @@ -1275,86 +1269,6 @@ failed: return err; } -static int drop_csums(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end) -{ - struct btrfs_path *path; - struct btrfs_leaf *leaf; - struct btrfs_key key; - int slot; - struct btrfs_csum_item *item; - char *old_block = NULL; - u64 cur = start; - u64 found_end; - u64 num_csums; - u64 item_size; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - while(cur < end) { - item = btrfs_lookup_csum(trans, root, path, - inode->i_ino, cur, 1); - if (IS_ERR(item)) { - cur += root->blocksize; - continue; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - slot = path->slots[0]; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - item_size = btrfs_item_size(leaf->items + slot); - num_csums = item_size / sizeof(struct btrfs_csum_item); - found_end = key.offset + (num_csums << inode->i_blkbits); - cur = found_end; - - if (found_end > end) { - char *src; - old_block = kmalloc(root->blocksize, GFP_NOFS); - src = btrfs_item_ptr(leaf, slot, char); - memcpy(old_block, src, item_size); - } - if (key.offset < start) { - u64 new_size = (start - key.offset) >> - inode->i_blkbits; - new_size *= sizeof(struct btrfs_csum_item); - ret = btrfs_truncate_item(trans, root, path, new_size); - BUG_ON(ret); - } else { - btrfs_del_item(trans, root, path); - } - btrfs_release_path(root, path); - if (found_end > end) { - char *dst; - int i; - int new_size; - - num_csums = (found_end - end) >> inode->i_blkbits; - new_size = num_csums * sizeof(struct btrfs_csum_item); - key.offset = end; - ret = btrfs_insert_empty_item(trans, root, path, - &key, new_size); - BUG_ON(ret); - dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char); - memcpy(dst, old_block + item_size - new_size, - new_size); - item = (struct btrfs_csum_item *)dst; - for (i = 0; i < num_csums; i++) { - btrfs_set_csum_extent_offset(item, end); - item++; - } - mark_buffer_dirty(path->nodes[0]); - kfree(old_block); - break; - } - } - btrfs_free_path(path); - return 0; -} - static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -1376,12 +1290,16 @@ static int drop_extents(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; search_again: -printk("drop extent inode %lu start %Lu end %Lu\n", inode->i_ino, start, end); ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, search_start, -1); - if (ret != 0) { -printk("lookup failed\n"); + if (ret < 0) goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; } while(1) { keep = 0; @@ -1390,14 +1308,11 @@ printk("lookup failed\n"); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); -printk("found key %Lu %Lu %u\n", key.objectid, key.offset, key.flags); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_end = key.offset + (btrfs_file_extent_num_blocks(extent) << inode->i_blkbits); -printk("extent end is %Lu\n", extent_end); if (key.offset >= end || key.objectid != inode->i_ino) { ret = 0; goto out; @@ -1420,16 +1335,12 @@ printk("extent end is %Lu\n", extent_end); keep = 1; WARN_ON(start & (root->blocksize - 1)); new_num = (start - key.offset) >> inode->i_blkbits; -printk("truncating existing extent, was %Lu ", btrfs_file_extent_num_blocks(extent)); btrfs_set_file_extent_num_blocks(extent, new_num); -printk("now %Lu\n", btrfs_file_extent_num_blocks(extent)); - mark_buffer_dirty(path->nodes[0]); } if (!keep) { u64 disk_blocknr; u64 disk_num_blocks; -printk("del old\n"); disk_blocknr = btrfs_file_extent_disk_blocknr(extent); disk_num_blocks = btrfs_file_extent_disk_num_blocks(extent); @@ -1454,15 +1365,12 @@ printk("del old\n"); if (bookend) { /* create bookend */ struct btrfs_key ins; -printk("bookend! extent end %Lu\n", extent_end); ins.objectid = inode->i_ino; ins.offset = end; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); - ret = drop_csums(trans, root, inode, start, end); - BUG_ON(ret); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); BUG_ON(ret); @@ -1486,10 +1394,9 @@ printk("bookend! extent end %Lu\n", extent_end); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); -printk("new bookend at offset %Lu, file_extent_offset %Lu, file_extent_num_blocks %Lu\n", end, btrfs_file_extent_offset(extent), btrfs_file_extent_num_blocks(extent)); btrfs_mark_buffer_dirty(path->nodes[0]); ret = 0; - goto out_nocsum; + goto out; } next_leaf: if (slot >= btrfs_header_nritems(&leaf->header) - 1) { @@ -1504,10 +1411,6 @@ next_leaf: } out: - ret = drop_csums(trans, root, inode, start, end); - BUG_ON(ret); - -out_nocsum: btrfs_free_path(path); return ret; } @@ -1556,7 +1459,6 @@ static int prepare_pages(struct btrfs_root *root, head = page_buffers(pages[i]); bh = head; do { -printk("mapping page %lu to block %Lu\n", pages[i]->index, alloc_extent_start); err = btrfs_map_bh_to_logical(root, bh, alloc_extent_start); BUG_ON(err); @@ -1597,7 +1499,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 start_pos; u64 num_blocks; u64 alloc_extent_start; - u64 orig_extent_start; struct btrfs_trans_handle *trans; struct btrfs_key ins; @@ -1640,7 +1541,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, (pos + count + root->blocksize -1) & ~(root->blocksize - 1)); } - orig_extent_start = start_pos; ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); BUG_ON(ret); @@ -1656,7 +1556,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -printk("num_pages is %lu\n", num_pages); memset(pages, 0, sizeof(pages)); ret = prepare_pages(root, file, pages, num_pages, @@ -1670,10 +1569,8 @@ printk("num_pages is %lu\n", num_pages); write_bytes, pages, buf); BUG_ON(ret); -printk("2num_pages is %lu\n", num_pages); ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, orig_extent_start, - pos, write_bytes); + num_pages, pos, write_bytes); BUG_ON(ret); btrfs_drop_pages(pages, num_pages); -- cgit v1.2.3 From a429e51371eee3c989160c003ee40bc3947c6a76 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 18 Apr 2007 16:15:28 -0400 Subject: Btrfs: working file_write, reorganized key flags Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 30 +++++++++++-- fs/btrfs/ctree.h | 52 +++++++++++----------- fs/btrfs/dir-item.c | 70 ----------------------------- fs/btrfs/extent-tree.c | 4 +- fs/btrfs/file-item.c | 66 +++++++++++++++++++-------- fs/btrfs/print-tree.c | 2 +- fs/btrfs/super.c | 118 ++++++++++++++++++++++++++----------------------- 7 files changed, 167 insertions(+), 175 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5c160c236773..4efcd1bd63e5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -893,7 +893,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } left_nritems = btrfs_header_nritems(&left->header); - for (i = left_nritems - 1; i >= 0; i--) { + if (left_nritems == 0) { + btrfs_block_release(root, right_buf); + return 1; + } + for (i = left_nritems - 1; i >= 1; i--) { item = left->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -907,6 +911,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, right_buf); return 1; } + if (push_items == left_nritems) + WARN_ON(1); right_nritems = btrfs_header_nritems(&right->header); /* push left to right */ push_space = btrfs_item_end(left->items + left_nritems - push_items); @@ -943,6 +949,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(left_buf); btrfs_mark_buffer_dirty(right_buf); + btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, &right->items[0].key, sizeof(struct btrfs_disk_key)); btrfs_mark_buffer_dirty(upper); @@ -1004,7 +1011,12 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { + if (btrfs_header_nritems(&right->header) == 0) { + btrfs_block_release(root, t); + return 1; + } + + for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { item = right->items + i; if (path->slots[0] == i) push_space += data_size + sizeof(*item); @@ -1018,6 +1030,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_block_release(root, t); return 1; } + if (push_items == btrfs_header_nritems(&right->header)) + WARN_ON(1); /* push data from right to left */ btrfs_memcpy(root, left, left->items + btrfs_header_nritems(&left->header), @@ -1064,7 +1078,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(right_buf); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1181,6 +1194,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->nodes[0] = right_buffer; path->slots[0] = 0; path->slots[1] -= 1; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, + path, &disk_key, 1); + if (wret) + ret = wret; + } return ret; } mid = slot; @@ -1241,6 +1260,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[1], 1); if (wret) ret = wret; + if (path->slots[1] == 0) { + wret = fixup_low_keys(trans, root, path, &disk_key, 1); + if (wret) + ret = wret; + } btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d75a4d5bc010..8a329d3901a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -282,11 +282,12 @@ struct btrfs_root { /* the lower bits in the key flags defines the item type */ #define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_MASK (BTRFS_KEY_TYPE_MAX - 1) +#define BTRFS_KEY_TYPE_SHIFT 24 +#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ + BTRFS_KEY_TYPE_SHIFT) #define BTRFS_KEY_OVERFLOW_MAX 128 -#define BTRFS_KEY_OVERFLOW_SHIFT 8 -#define BTRFS_KEY_OVERFLOW_MASK (0x7FULL << BTRFS_KEY_OVERFLOW_SHIFT) +#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1) /* * inode items have the data typically returned from stat and store other @@ -586,56 +587,55 @@ static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, disk->flags = cpu_to_le32(val); } -static inline u32 btrfs_key_overflow(struct btrfs_key *key) +static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) { - u32 over = key->flags & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; } -static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) +static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, + u32 val) { - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - key->flags = (key->flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + u32 flags = btrfs_disk_key_flags(key); + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; + btrfs_set_disk_key_flags(key, flags); } static inline u32 btrfs_key_type(struct btrfs_key *key) { - return key->flags & BTRFS_KEY_TYPE_MASK; + return key->flags >> BTRFS_KEY_TYPE_SHIFT; } -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) +static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) { - return le32_to_cpu(key->flags) & BTRFS_KEY_TYPE_MASK; + BUG_ON(val >= BTRFS_KEY_TYPE_MAX); + val = val << BTRFS_KEY_TYPE_SHIFT; + key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; } -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 type) +static inline u32 btrfs_key_overflow(struct btrfs_key *key) { - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - key->flags = (key->flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; + return key->flags & BTRFS_KEY_OVERFLOW_MASK; } -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, u32 type) +static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) { - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(type >= BTRFS_KEY_TYPE_MAX); - flags = (flags & ~((u64)BTRFS_KEY_TYPE_MASK)) | type; - btrfs_set_disk_key_flags(key, flags); + BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); + key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; } static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) { - u32 over = le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; - return over >> BTRFS_KEY_OVERFLOW_SHIFT; + return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; } -static inline void btrfs_set_disK_key_overflow(struct btrfs_disk_key *key, +static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key, u32 over) { u32 flags = btrfs_disk_key_flags(key); BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - over = over << BTRFS_KEY_OVERFLOW_SHIFT; - flags = (flags & ~((u64)BTRFS_KEY_OVERFLOW_MASK)) | over; + flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; btrfs_set_disk_key_flags(key, flags); } diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index cd4137a8b87e..a43deb726486 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -58,30 +58,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); - - /* FIXME, use some real flag for selecting the extra index */ - if (root == root->fs_info->tree_root) - goto out; - - btrfs_release_path(root, path); - - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = location->objectid; - ret = insert_with_overflow(trans, root, path, &key, data_size); - // FIXME clear the dirindex bit - if (ret) - goto out; - - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -135,52 +111,6 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } -int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 dir, - u64 objectid, int mod) -{ - int ret; - struct btrfs_key key; - int ins_len = mod < 0 ? -1 : 0; - int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; - int overflow = 0; - - key.objectid = dir; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); - key.offset = objectid; - - while(1) { - btrfs_set_key_overflow(&key, overflow); - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (overflow >= BTRFS_KEY_OVERFLOW_MAX) - return 1; - overflow++; - btrfs_set_key_overflow(&key, overflow); - btrfs_release_path(root, path); - continue; - } else { - /* found */ - break; - } - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_INDEX_KEY) - return 1; - if (btrfs_disk_key_offset(found_key) == objectid) - return 0; - return 1; -} - int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2cee9df001f6..cb04a70eb7e8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -35,8 +35,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); - if (ret != 0) + if (ret != 0) { +printk("can't find block %Lu %Lu\n", blocknr, num_blocks); BUG(); + } BUG_ON(ret != 0); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ff8f3339c684..8cc3c1d15412 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -4,7 +4,7 @@ #include "transaction.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ - sizeof(struct btrfs_item)) / \ + sizeof(struct btrfs_item) * 2) / \ sizeof(struct btrfs_csum_item)) - 1)) int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -19,11 +19,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - /* - ret = btrfs_alloc_extent(trans, root, num_blocks, hint_block, - (u64)-1, &ins); - */ - BUG_ON(ret); file_key.objectid = objectid; file_key.offset = pos; file_key.flags = 0; @@ -40,6 +35,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(root, path); btrfs_free_path(path); return 0; @@ -57,6 +53,7 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_csum_item *item; struct btrfs_leaf *leaf; u64 csum_offset = 0; + int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; @@ -79,9 +76,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - if (csum_offset >= - btrfs_item_size(leaf->items + path->slots[0]) / - sizeof(struct btrfs_csum_item)) { + csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item /= sizeof(struct btrfs_csum_item); + + if (csum_offset >= csums_in_item) { + ret = -EFBIG; goto fail; } } @@ -128,16 +127,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_init_path(path); - item = btrfs_lookup_csum(trans, root, path, objectid, offset, 0); - if (!IS_ERR(item)) - goto found; - btrfs_release_path(root, path); file_key.objectid = objectid; file_key.offset = offset; file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); + + item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1); + if (!IS_ERR(item)) + goto found; + ret = PTR_ERR(item); + if (ret == -EFBIG) { + u32 item_size; + /* we found one, but it isn't big enough yet */ + leaf = btrfs_buffer_leaf(path->nodes[0]); + item_size = btrfs_item_size(leaf->items + path->slots[0]); + if ((item_size / sizeof(struct btrfs_csum_item)) >= + MAX_CSUM_ITEMS(root)) { + /* already at max size, make a new one */ + goto insert; + } + } else { + /* we didn't find a csum item, insert one */ + goto insert; + } + + /* + * at this point, we know the tree has an item, but it isn't big + * enough yet to put our csum in. Grow it + */ + btrfs_release_path(root, path); ret = btrfs_search_slot(trans, root, &file_key, path, sizeof(struct btrfs_csum_item), 1); if (ret < 0) @@ -146,7 +165,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, BUG(); } if (path->slots[0] == 0) { - btrfs_release_path(root, path); goto insert; } path->slots[0]--; @@ -157,29 +175,36 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - btrfs_release_path(root, path); + WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / sizeof(struct btrfs_csum_item)) { - ret = btrfs_extend_item(trans, root, path, - sizeof(struct btrfs_csum_item)); + u32 diff = (csum_offset + 1) * sizeof(struct btrfs_csum_item); + diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + WARN_ON(diff != sizeof(struct btrfs_csum_item)); + ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; } insert: + btrfs_release_path(root, path); csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(struct btrfs_csum_item)); - if (ret != 0 && ret != -EEXIST) + if (ret != 0) { + printk("at insert for %Lu %u %Lu ret is %d\n", file_key.objectid, file_key.flags, file_key.offset, ret); + WARN_ON(1); goto fail; + } csum: item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_csum_item); ret = 0; item += csum_offset; found: + btrfs_check_bounds(item->csum, BTRFS_CSUM_SIZE, path->nodes[0]->b_data, root->fs_info->sb->s_blocksize); ret = btrfs_csum_data(root, data, len, item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: @@ -210,6 +235,9 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); if (IS_ERR(item)) { ret = PTR_ERR(item); + /* a csum that isn't present is a preallocated region. */ + if (ret == -ENOENT || ret == -EFBIG) + ret = 1; goto fail; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 0732a2fbb23a..1e7038b070ae 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -19,7 +19,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) for (i = 0 ; i < nr ; i++) { item = l->items + i; type = btrfs_disk_key_type(&item->key); - printk("\titem %d key (%Lu %u %Lu) itemoff %d itemsize %d\n", + printk("\titem %d key (%Lu %x %Lu) itemoff %d itemsize %d\n", i, btrfs_disk_key_objectid(&item->key), btrfs_disk_key_flags(&item->key), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b2a2220d1352..583cd87a62e2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -150,11 +150,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); - ret = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_release_path(root, path); @@ -329,8 +324,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) >> 9; + btrfs_file_extent_num_blocks(fi) << 3; found_extent = 1; } else { found_extent = 0; @@ -562,7 +558,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (btrfs_disk_key_objectid(&item->key) != key.objectid) break; if (btrfs_disk_key_type(&item->key) != key_type) - continue; + break; if (btrfs_disk_key_offset(&item->key) < filp->f_pos) continue; filp->f_pos = btrfs_disk_key_offset(&item->key); @@ -1285,29 +1281,27 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_path *path; u64 search_start = start; int bookend; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; -search_again: - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - search_start, -1); - if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = -ENOENT; + while(1) { + btrfs_release_path(root, path); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret < 0) goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = -ENOENT; + goto out; + } + path->slots[0]--; } - path->slots[0]--; - } - while(1) { keep = 0; bookend = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); extent_end = key.offset + @@ -1318,7 +1312,10 @@ search_again: goto out; } if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) - goto next_leaf; + goto out; + if (search_start >= extent_end) + goto out; + search_start = extent_end; if (end < extent_end && end >= key.offset) { memcpy(&old, extent, sizeof(old)); @@ -1331,10 +1328,13 @@ search_again: if (start > key.offset) { u64 new_num; + u64 old_num; /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); new_num = (start - key.offset) >> inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } @@ -1344,13 +1344,11 @@ search_again: disk_blocknr = btrfs_file_extent_disk_blocknr(extent); disk_num_blocks = btrfs_file_extent_disk_num_blocks(extent); - search_start = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, disk_num_blocks, 0); @@ -1360,7 +1358,7 @@ search_again: goto out; } if (!bookend) - goto search_again; + continue; } if (bookend) { /* create bookend */ @@ -1395,21 +1393,12 @@ search_again: btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; ret = 0; goto out; } -next_leaf: - if (slot >= btrfs_header_nritems(&leaf->header) - 1) { - ret = btrfs_next_leaf(root, path); - if (ret) { - ret = 0; - goto out; - } - } else { - path->slots[0]++; - } } - out: btrfs_free_path(path); return ret; @@ -1445,15 +1434,6 @@ static int prepare_pages(struct btrfs_root *root, } offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); -#if 0 - if ((pages[i]->index == first_index || - pages[i]->index == last_index) && pos < isize && - !PageUptodate(pages[i])) { - ret = mpage_readpage(pages[i], btrfs_get_block); - BUG_ON(ret); - lock_page(pages[i]); - } -#endif create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, (1 << BH_Uptodate)); head = page_buffers(pages[i]); @@ -1494,6 +1474,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; + struct page *pinned[2] = { NULL, NULL }; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1505,14 +1486,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; - - start_pos = pos & ~(root->blocksize - 1); - /* FIXME */ - if (start_pos != pos) - return -EINVAL; - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); @@ -1524,10 +1497,37 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (err) goto out; file_update_time(file); + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + mutex_lock(&inode->i_mutex); first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; + if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size && + (pos & (PAGE_CACHE_SIZE - 1))) { + pinned[0] = grab_cache_page(inode->i_mapping, first_index); + if (!PageUptodate(pinned[0])) { + ret = mpage_readpage(pinned[0], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[0]); + } + } + if (first_index != last_index && + (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + (count & (PAGE_CACHE_SIZE - 1))) { + pinned[1] = grab_cache_page(inode->i_mapping, last_index); + if (!PageUptodate(pinned[1])) { + ret = mpage_readpage(pinned[1], btrfs_get_block); + BUG_ON(ret); + } else { + unlock_page(pinned[1]); + } + } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -1535,11 +1535,14 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { + /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~(root->blocksize - 1)); + ~((u64)root->blocksize - 1)); } ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); @@ -1585,8 +1588,13 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, out_unlock: mutex_unlock(&inode->i_mutex); out: + if (pinned[0]) + page_cache_release(pinned[0]); + if (pinned[1]) + page_cache_release(pinned[1]); *ppos = pos; current->backing_dev_info = NULL; + mark_inode_dirty(inode); return num_written ? num_written : err; } -- cgit v1.2.3 From 236454dfffb64a95ee01c50a215153f5de61c475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 13:37:44 -0400 Subject: Btrfs: many file_write fixes, inline data Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 35 ++++ fs/btrfs/disk-io.c | 6 + fs/btrfs/extent-tree.c | 5 + fs/btrfs/file-item.c | 1 + fs/btrfs/super.c | 553 ++++++++++++++++++++++++++++++++++++++----------- 5 files changed, 484 insertions(+), 116 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8a329d3901a2..f1800008be34 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -73,6 +73,9 @@ struct btrfs_header { (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) #define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ + sizeof(struct btrfs_item) - \ + sizeof(struct btrfs_file_extent_item)) struct buffer_head; /* @@ -204,8 +207,12 @@ struct btrfs_root_item { __le32 refs; } __attribute__ ((__packed__)); +#define BTRFS_FILE_EXTENT_REG 0 +#define BTRFS_FILE_EXTENT_INLINE 1 + struct btrfs_file_extent_item { __le64 generation; + u8 type; /* * disk space consumed by the extent, checksum blocks are included * in these numbers @@ -862,6 +869,34 @@ static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) return (u8 *)l->items; } +static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) +{ + return e->type; +} +static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, + u8 val) +{ + e->type = val; +} + +static inline char *btrfs_file_extent_inline_start(struct + btrfs_file_extent_item *e) +{ + return (char *)(&e->disk_blocknr); +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return (unsigned long)(&((struct + btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; +} + +static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) +{ + struct btrfs_file_extent_item *fe = NULL; + return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); +} + static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item *e) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2a3f529cada..11e17a2f736b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -108,6 +108,12 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int ret; + if (logical == 0) { + bh->b_bdev = NULL; + bh->b_blocknr = 0; + set_buffer_mapped(bh); + return 0; + } root = root->fs_info->dev_root; ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, (void **)lookup, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cb04a70eb7e8..b2faad3e8791 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -111,6 +111,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; fi = btrfs_item_ptr(buf_leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; ret = btrfs_inc_extent_ref(trans, root, btrfs_file_extent_disk_blocknr(fi), btrfs_file_extent_disk_num_blocks(fi)); @@ -539,6 +542,8 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 8cc3c1d15412..10e4cf08e9ed 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,6 +34,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); + btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root, path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 583cd87a62e2..1b286bb26f12 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -317,19 +317,22 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; + found_extent = 0; if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); - extent_start = btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); - /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; - } else { - found_extent = 0; + if (btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + extent_start = + btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ + inode->i_blocks -= + btrfs_file_extent_num_blocks(fi) << 3; + found_extent = 1; + } } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1010,9 +1013,9 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; + u32 found_type; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans = NULL; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; @@ -1021,13 +1024,12 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, BUG_ON(!path); btrfs_init_path(path); if (create) { - trans = btrfs_start_transaction(root, 1); WARN_ON(1); } - ret = btrfs_lookup_file_extent(trans, root, path, + ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, - iblock << inode->i_blkbits, create); + iblock << inode->i_blkbits, 0); if (ret < 0) { err = ret; goto out; @@ -1036,7 +1038,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret != 0) { if (path->slots[0] == 0) { btrfs_release_path(root, path); - goto allocate; + goto out; } path->slots[0]--; } @@ -1047,73 +1049,51 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, blocknr = btrfs_file_extent_disk_blocknr(item); blocknr += btrfs_file_extent_offset(item); - /* exact match found, use it, FIXME, deal with extents - * other than the page size - */ - if (0 && ret == 0) { - err = 0; - if (create && - btrfs_file_extent_generation(item) != trans->transid) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, 1, - blocknr, (u64)-1, &ins); - BUG_ON(ret); - btrfs_set_file_extent_disk_blocknr(item, ins.objectid); - mark_buffer_dirty(path->nodes[0]); - ret = btrfs_free_extent(trans, root, - blocknr, 1, 0); - BUG_ON(ret); - blocknr = ins.objectid; - - } - btrfs_map_bh_to_logical(root, result, blocknr); - goto out; - } - /* are we inside the extent that was found? */ found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); if (btrfs_disk_key_objectid(found_key) != objectid || - btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) { + found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; btrfs_release_path(root, path); - goto allocate; - } - - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - if (iblock >= extent_start && iblock < extent_end) { - err = 0; - btrfs_map_bh_to_logical(root, result, blocknr + iblock - - extent_start); - goto out; - } -allocate: - /* ok, create a new extent */ - if (!create) { - err = 0; goto out; } -#if 0 - ret = btrfs_alloc_file_extent(trans, root, objectid, - iblock << inode->i_blkbits, - 1, extent_end, &blocknr); - if (ret) { - err = ret; - goto out; + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_start = extent_start >> inode->i_blkbits; + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + if (iblock >= extent_start && iblock < extent_end) { + err = 0; + btrfs_map_bh_to_logical(root, result, blocknr + + iblock - extent_start); + goto out; + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = (extent_start + size) >> inode->i_blkbits; + extent_start >>= inode->i_blkbits; + if (iblock < extent_start || iblock > extent_end) { + goto out; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(result->b_page); + memcpy(map, ptr, size); + memset(map + size, 0, PAGE_CACHE_SIZE - size); + flush_dcache_page(result->b_page); + kunmap(result->b_page); + set_buffer_uptodate(result); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, 0); } - inode->i_blocks += inode->i_sb->s_blocksize >> 9; - set_buffer_new(result); - map_bh(result, inode->i_sb, blocknr); - - btrfs_map_bh_to_logical(root, result, blocknr); -#endif out: btrfs_release_path(root, path); btrfs_free_path(path); - if (trans) - btrfs_end_transaction(trans, root); return err; } @@ -1124,7 +1104,6 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, struct btrfs_root *root = BTRFS_I(inode)->root; mutex_lock(&root->fs_info->fs_mutex); err = btrfs_get_block_lock(inode, iblock, result, create); - // err = btrfs_get_block_inline(inode, iblock, result, create); mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -1134,11 +1113,6 @@ static int btrfs_prepare_write(struct file *file, struct page *page, { return nobh_prepare_write(page, from, to, btrfs_get_block); } -static int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return nobh_commit_write(file, page, from, to); -} static void btrfs_write_super(struct super_block *sb) { @@ -1150,9 +1124,227 @@ static int btrfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, btrfs_get_block); } +/* + * While block_write_full_page is writing back the dirty buffers under + * the page lock, whoever dirtied the buffers may decide to clean them + * again at any time. We handle that by only looking at the buffer + * state inside lock_buffer(). + * + * If block_write_full_page() is called for regular writeback + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a + * locked buffer. This only can happen if someone has written the buffer + * directly, with submit_bh(). At the address_space level PageWriteback + * prevents this contention from occurring. + */ +static int __btrfs_write_full_page(struct inode *inode, struct page *page, + struct writeback_control *wbc) +{ + int err; + sector_t block; + sector_t last_block; + struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; + int nr_underway = 0; + + BUG_ON(!PageLocked(page)); + + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + head = page_buffers(page); + bh = head; + + /* + * Get all the dirty buffers mapped to disk addresses and + * handle any aliases from the underlying blockdev's mapping. + */ + do { + if (block > last_block) { + /* + * mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. + */ + /* + * The buffer was zeroed by block_write_full_page() + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block(inode, block, bh, 0); + if (err) + goto recover; + if (buffer_new(bh)) { + /* blockdev mappings never come here */ + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; +done: + if (nr_underway == 0) { + /* + * The page was marked dirty, but the buffers were + * clean. Someone wrote them back by hand with + * ll_rw_block/submit_bh. A rare case. + */ + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (uptodate) + SetPageUptodate(page); + end_page_writeback(page); + /* + * The page and buffer_heads can be released at any time from + * here on. + */ + wbc->pages_skipped++; /* We didn't write this page */ + } + return err; + +recover: + /* + * ENOSPC, or some other error. We may already have added some + * blocks to the file, so we need to write these out to avoid + * exposing stale data. + * The page is currently locked and not marked for writeback + */ + bh = head; + /* Recovery: lock and submit the mapped buffers */ + do { + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * The buffer may have been set dirty during + * attachment to a dirty page. + */ + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + goto done; +} + +/* + * The generic ->writepage function for buffer-backed address_spaces + */ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, btrfs_get_block, wbc); + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) + return __btrfs_write_full_page(inode, page, wbc); + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ + block_invalidatepage(page, 0); + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invokation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return __btrfs_write_full_page(inode, page, wbc); } static void btrfs_truncate(struct inode *inode) @@ -1179,6 +1371,29 @@ static void btrfs_truncate(struct inode *inode) mark_inode_dirty(inode); } +/* + * Make sure any changes to nobh_commit_write() are reflected in + * nobh_truncate_page(), since it doesn't call commit_write(). + */ +static int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + SetPageUptodate(page); + bh = page_buffers(page); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + set_page_dirty(page); + } + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} + static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, const char __user * buf) @@ -1234,6 +1449,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int ret; int this_write; struct inode *inode = file->f_path.dentry->d_inode; + struct buffer_head *bh; + struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); @@ -1242,16 +1459,47 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_csum_file_block(trans, root, inode->i_ino, + + bh = page_buffers(pages[i]); + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + struct btrfs_key key; + struct btrfs_path *path; + char *ptr; + u32 datasize; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = pages[i]->index << PAGE_CACHE_SHIFT; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(write_bytes >= PAGE_CACHE_SIZE); + datasize = offset + + btrfs_file_extent_calc_inline_size(write_bytes); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + memcpy(ptr, bh->b_data, offset + write_bytes); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + } else { + btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); + kunmap(pages[i]); + } SetPageChecked(pages[i]); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - ret = nobh_commit_write(file, pages[i], offset, + ret = btrfs_commit_write(file, pages[i], offset, offset + this_write); pos += this_write; if (ret) { @@ -1275,12 +1523,16 @@ static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; int slot; struct btrfs_file_extent_item *extent; - u64 extent_end; + u64 extent_end = 0; int keep; struct btrfs_file_extent_item old; struct btrfs_path *path; u64 search_start = start; int bookend; + int found_type; + int found_extent; + int found_inline; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1292,37 +1544,62 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; if (ret > 0) { if (path->slots[0] == 0) { - ret = -ENOENT; + ret = 0; goto out; } path->slots[0]--; } keep = 0; bookend = 0; + found_extent = 0; + found_inline = 0; + extent = NULL; leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); if (key.offset >= end || key.objectid != inode->i_ino) { ret = 0; goto out; } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { + ret = 0; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + slot); + } + + if (!found_extent && !found_inline) { + ret = 0; goto out; - if (search_start >= extent_end) + } + + if (search_start >= extent_end) { + ret = 0; goto out; + } + search_start = extent_end; if (end < extent_end && end >= key.offset) { - memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (found_extent) { + memcpy(&old, extent, sizeof(old)); + ret = btrfs_inc_extent_ref(trans, root, + btrfs_file_extent_disk_blocknr(&old), + btrfs_file_extent_disk_num_blocks(&old)); + BUG_ON(ret); + } + WARN_ON(found_inline); bookend = 1; } @@ -1332,25 +1609,45 @@ static int drop_extents(struct btrfs_trans_handle *trans, /* truncate existing extent */ keep = 1; WARN_ON(start & (root->blocksize - 1)); - new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); - inode->i_blocks -= (old_num - new_num) << 3; - btrfs_set_file_extent_num_blocks(extent, new_num); - mark_buffer_dirty(path->nodes[0]); + if (found_extent) { + new_num = (start - key.offset) >> + inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + inode->i_blocks -= (old_num - new_num) << 3; + btrfs_set_file_extent_num_blocks(extent, + new_num); + mark_buffer_dirty(path->nodes[0]); + } else { + WARN_ON(1); + /* + ret = btrfs_truncate_item(trans, root, path, + start - key.offset); + BUG_ON(ret); + */ + } } if (!keep) { - u64 disk_blocknr; - u64 disk_num_blocks; - disk_blocknr = btrfs_file_extent_disk_blocknr(extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + u64 disk_blocknr = 0; + u64 disk_num_blocks = 0; + u64 extent_num_blocks = 0; + if (found_extent) { + disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + extent_num_blocks = + btrfs_file_extent_num_blocks(extent); + } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - inode->i_blocks -= - btrfs_file_extent_num_blocks(extent) << 3; btrfs_release_path(root, path); - ret = btrfs_free_extent(trans, root, disk_blocknr, - disk_num_blocks, 0); + if (found_extent) { + inode->i_blocks -= + btrfs_file_extent_num_blocks(extent) << 3; + ret = btrfs_free_extent(trans, root, + disk_blocknr, + disk_num_blocks, 0); + } BUG_ON(ret); if (!bookend && search_start >= end) { @@ -1360,7 +1657,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, if (!bookend) continue; } - if (bookend) { + if (bookend && found_extent) { /* create bookend */ struct btrfs_key ins; ins.objectid = inode->i_ino; @@ -1390,6 +1687,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_num_blocks(&old) - ((end - key.offset) >> inode->i_blkbits)); + btrfs_set_file_extent_type(extent, + BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); @@ -1445,7 +1744,8 @@ static int prepare_pages(struct btrfs_root *root, if (err) goto failed_truncate; bh = bh->b_this_page; - alloc_extent_start++; + if (alloc_extent_start) + alloc_extent_start++; } while (bh != head); pos += this_write; WARN_ON(this_write > write_bytes); @@ -1543,12 +1843,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, start_pos, (pos + count + root->blocksize -1) & ~((u64)root->blocksize - 1)); + BUG_ON(ret); } - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, + if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || + pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + ret = btrfs_alloc_extent(trans, root, num_blocks, 1, (u64)-1, &ins); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); + BUG_ON(ret); + } else { + ins.offset = 0; + ins.objectid = 0; + } BUG_ON(ret); alloc_extent_start = ins.objectid; ret = btrfs_end_transaction(trans, root); @@ -1567,7 +1875,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, BUG_ON(ret); /* FIXME blocks != pagesize */ - alloc_extent_start += num_pages; + if (alloc_extent_start) + alloc_extent_start += num_pages; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); BUG_ON(ret); @@ -1779,10 +2088,11 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, if (!PageChecked(page)) { /* FIXME, do it per block */ struct btrfs_root *root = BTRFS_I(inode)->root; + int ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); if (ret) { printk("failed to verify ino %lu page %lu\n", page->mapping->host->i_ino, @@ -2249,6 +2559,16 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } + +static int btrfs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = 256 * 1024; + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2298,6 +2618,7 @@ static struct address_space_operations btrfs_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, + .getattr = btrfs_getattr, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3 From 7e38180e207c8707691c12395e990a5d583c5e71 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 15:36:27 -0400 Subject: Btrfs: directory inode index is back Signed-off-by: Chris Mason --- fs/btrfs/TODO | 4 +- fs/btrfs/ctree.h | 58 +++------- fs/btrfs/dir-item.c | 204 ++++++++++++++++++++++++----------- fs/btrfs/super.c | 302 ++++++++-------------------------------------------- 4 files changed, 206 insertions(+), 362 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 1c3acfcf146e..8d7e5a3ed629 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,12 +1,12 @@ * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling -* Add block mapping tree (simple dm layer) * Make allocator smarter * add a block group to struct inode -* Make sure nobh stuff is working properly for cows * Do actual block accounting * Check compat and incompat flags on the inode * Get rid of struct ctree_path, limiting tree levels held at one time +* Add generation number to key pointer in nodes +* Add parent pointer back to extent map. * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f1800008be34..d628a3b9441c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -185,11 +185,6 @@ struct btrfs_inode_item { struct btrfs_inode_timespec otime; } __attribute__ ((__packed__)); -/* inline data is just a blob of bytes */ -struct btrfs_inline_data_item { - u8 data; -} __attribute__ ((__packed__)); - struct btrfs_dir_item { struct btrfs_disk_key location; __le16 flags; @@ -293,9 +288,6 @@ struct btrfs_root { #define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ BTRFS_KEY_TYPE_SHIFT) -#define BTRFS_KEY_OVERFLOW_MAX 128 -#define BTRFS_KEY_OVERFLOW_MASK ((u32)BTRFS_KEY_OVERFLOW_MAX - 1) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -621,31 +613,6 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; } -static inline u32 btrfs_key_overflow(struct btrfs_key *key) -{ - return key->flags & BTRFS_KEY_OVERFLOW_MASK; -} - -static inline void btrfs_set_key_overflow(struct btrfs_key *key, u32 over) -{ - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - key->flags = (key->flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; -} - -static inline u32 btrfs_disk_key_overflow(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) & BTRFS_KEY_OVERFLOW_MASK; -} - -static inline void btrfs_set_disk_key_overflow(struct btrfs_disk_key *key, - u32 over) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(over >= BTRFS_KEY_OVERFLOW_MAX); - flags = (flags & ~BTRFS_KEY_OVERFLOW_MASK) | over; - btrfs_set_disk_key_flags(key, flags); -} - static inline u64 btrfs_header_blocknr(struct btrfs_header *h) { return le64_to_cpu(h->blocknr); @@ -1079,15 +1046,24 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type); -int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, int mod); -int btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, u64 dir, - u64 objectid, int mod); -int btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod); +struct btrfs_dir_item * +btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, const char *name, int name_len, + int mod); +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, const char *name, int name_len); +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index a43deb726486..7a7e9846860a 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,24 +4,31 @@ #include "hash.h" #include "transaction.h" -int insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size) { - int overflow; int ret; + char *ptr; + struct btrfs_item *item; + struct btrfs_leaf *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); - overflow = btrfs_key_overflow(cpu_key); - - while(ret == -EEXIST && overflow < BTRFS_KEY_OVERFLOW_MAX) { - overflow++; - btrfs_set_key_overflow(cpu_key, overflow); - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, cpu_key, - data_size); + if (ret == -EEXIST) { + ret = btrfs_extend_item(trans, root, path, data_size); + WARN_ON(ret > 0); + if (ret) + return ERR_PTR(ret); } - return ret; + WARN_ON(ret > 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + item = leaf->items + path->slots[0]; + ptr = btrfs_item_ptr(leaf, path->slots[0], char); + BUG_ON(data_size > btrfs_item_size(item)); + ptr += btrfs_item_size(item) - data_size; + return (struct btrfs_dir_item *)ptr; } int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root @@ -43,13 +50,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - ret = insert_with_overflow(trans, root, path, &key, data_size); - if (ret) + dir_item = insert_with_overflow(trans, root, path, &key, data_size); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); goto out; + } - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); btrfs_cpu_key_to_disk(&dir_item->location, location); btrfs_set_dir_type(dir_item, type); btrfs_set_dir_flags(dir_item, 0); @@ -58,15 +64,39 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); -out: + + /* FIXME, use some real flag for selecting the extra index */ + if (root == root->fs_info->tree_root) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); + + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = location->objectid; + dir_item = insert_with_overflow(trans, root, path, &key, data_size); + if (IS_ERR(dir_item)) { + ret = PTR_ERR(dir_item); + goto out; + } + btrfs_cpu_key_to_disk(&dir_item->location, location); + btrfs_set_dir_type(dir_item, type); + btrfs_set_dir_flags(dir_item, 0); + btrfs_set_dir_name_len(dir_item, name_len); + name_ptr = (char *)(dir_item + 1); + btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); + btrfs_mark_buffer_dirty(path->nodes[0]); +out: btrfs_free_path(path); return ret; } -int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u64 dir, - const char *name, int name_len, int mod) +struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, int name_len, + int mod) { int ret; struct btrfs_key key; @@ -74,57 +104,111 @@ int btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; struct btrfs_disk_key *found_key; struct btrfs_leaf *leaf; - u32 overflow; key.objectid = dir; key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); - // btrfs_set_key_overflow(&key, BTRFS_KEY_OVERFLOW_MAX - 1); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); - while(1) { - ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); - if (ret < 0) - return ret; - if (ret > 0) { - if (path->slots[0] == 0) - return 1; - path->slots[0]--; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) - return 1; - - if (btrfs_match_dir_item_name(root, path, name, name_len)) - return 0; - - overflow = btrfs_disk_key_overflow(found_key); - if (overflow == 0) - return 1; - btrfs_set_key_overflow(&key, overflow - 1); - btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; } - return 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + + if (btrfs_disk_key_objectid(found_key) != dir || + btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || + btrfs_disk_key_offset(found_key) != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); } -int btrfs_match_dir_item_name(struct btrfs_root *root, +struct btrfs_dir_item * +btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + u64 objectid, const char *name, int name_len, + int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + + key.objectid = dir; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); + key.offset = objectid; + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + return btrfs_match_dir_item_name(root, path, name, name_len); +} + +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) { struct btrfs_dir_item *dir_item; char *name_ptr; + u32 total_len; + u32 cur = 0; + u32 this_len; + struct btrfs_leaf *leaf; - dir_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_dir_item); - if (btrfs_dir_name_len(dir_item) != name_len) - return 0; - name_ptr = (char *)(dir_item + 1); - if (memcmp(name_ptr, name, name_len)) - return 0; - return 1; + leaf = btrfs_buffer_leaf(path->nodes[0]); + dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + total_len = btrfs_item_size(leaf->items + path->slots[0]); + while(cur < total_len) { + this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); + name_ptr = (char *)(dir_item + 1); + + if (btrfs_dir_name_len(dir_item) == name_len && + memcmp(name_ptr, name, name_len) == 0) + return dir_item; + + cur += this_len; + dir_item = (struct btrfs_dir_item *)((char *)dir_item + + this_len); + } + return NULL; } + +int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_dir_item *di) +{ + + struct btrfs_leaf *leaf; + u32 sub_item_len; + u32 item_len; + int ret; + + leaf = btrfs_buffer_leaf(path->nodes[0]); + sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); + item_len = btrfs_item_size(leaf->items + path->slots[0]); + if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + char *ptr = (char *)di; + char *start = btrfs_item_ptr(leaf, path->slots[0], char); + btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + item_len - (ptr + sub_item_len - start)); + ret = btrfs_truncate_item(trans, root, path, + item_len - sub_item_len); + BUG_ON(ret); + } + return 0; +} + diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1b286bb26f12..dff2db0d1dc2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -127,32 +127,43 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_path *path; const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; - int ret; + int ret = 0; u64 objectid; struct btrfs_dir_item *di; path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); - if (ret < 0) + if (IS_ERR(di)) { + ret = PTR_ERR(di); goto err; - if (ret > 0) { + } + if (!di) { ret = -ENOENT; goto err; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_dir_item); objectid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); - ret = btrfs_del_item(trans, root, path); + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + ret = btrfs_delete_one_dir_name(trans, root, path, di); BUG_ON(ret); - btrfs_release_path(root, path); dentry->d_inode->i_ctime = dir->i_ctime; err: - btrfs_release_path(root, path); btrfs_free_path(path); if (ret == 0) { inode_dec_link_count(dentry->d_inode); @@ -388,15 +399,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - ret = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, + di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, namelen, 0); - if (ret || !btrfs_match_dir_item_name(root, path, name, namelen)) { + if (!di || IS_ERR(di)) { location->objectid = 0; ret = 0; goto out; } - di = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_dir_item); btrfs_disk_key_to_cpu(location, &di->location); out: btrfs_release_path(root, path); @@ -523,7 +532,10 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int advance; unsigned char d_type = DT_UNKNOWN; int over = 0; - int key_type = BTRFS_DIR_ITEM_KEY; + u32 di_cur; + u32 di_total; + u32 di_len; + int key_type = BTRFS_DIR_INDEX_KEY; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) @@ -567,12 +579,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos = btrfs_disk_key_offset(&item->key); advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), d_type); - if (over) - goto nopos; + di_cur = 0; + di_total = btrfs_item_size(leaf->items + slot); + while(di_cur < di_total) { + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_objectid(&di->location), + d_type); + if (over) + goto nopos; + di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_cur += di_len; + di = (struct btrfs_dir_item *)((char *)di + di_len); + } } filp->f_pos++; nopos: @@ -935,75 +955,6 @@ printk("btrfs sync_fs\n"); return 0; } -#if 0 -static int btrfs_get_block_inline(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - struct btrfs_root *root = btrfs_sb(inode->i_sb); - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_leaf *leaf; - int num_bytes = result->b_size; - int item_size; - int ret; - u64 pos; - char *ptr; - int copy_size; - int err = 0; - char *safe_ptr; - char *data_ptr; - - path = btrfs_alloc_path(); - BUG_ON(!path); - - WARN_ON(create); - if (create) { - return 0; - } - pos = iblock << inode->i_blkbits; - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); - ptr = kmap(result->b_page); - safe_ptr = ptr; - ptr += (pos & (PAGE_CACHE_SIZE -1)); -again: - key.offset = pos; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret) { - if (ret < 0) - err = ret; - else - err = 0; - goto out; - } - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); - copy_size = min(num_bytes, item_size); - data_ptr = btrfs_item_ptr(leaf, path->slots[0], char); - WARN_ON(safe_ptr + PAGE_CACHE_SIZE < ptr + copy_size); - memcpy(ptr, data_ptr, copy_size); - pos += copy_size; - num_bytes -= copy_size; - WARN_ON(num_bytes < 0); - ptr += copy_size; - btrfs_release_path(root, path); - if (num_bytes != 0) { - if (pos >= i_size_read(inode)) - memset(ptr, 0, num_bytes); - else - goto again; - } - set_buffer_uptodate(result); - map_bh(result, inode->i_sb, 0); - err = 0; -out: - btrfs_free_path(path); - kunmap(result->b_page); - return err; -} -#endif - static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1907,174 +1858,6 @@ out: return num_written ? num_written : err; } -#if 0 -static ssize_t inline_one_page(struct btrfs_root *root, struct inode *inode, - struct page *page, loff_t pos, - size_t offset, size_t write_bytes) -{ - struct btrfs_path *path; - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_leaf *leaf; - struct btrfs_key found_key; - int ret; - size_t copy_size = 0; - char *dst = NULL; - int err = 0; - size_t num_written = 0; - - path = btrfs_alloc_path(); - BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INLINE_DATA_KEY); - -again: - key.offset = pos; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - err = ret; - goto out; - } - if (ret == 0) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); - copy_size = btrfs_item_size(leaf->items + path->slots[0]); - dst = btrfs_item_ptr(leaf, path->slots[0], char); - copy_size = min(write_bytes, copy_size); - goto copyit; - } else { - int slot = path->slots[0]; - if (slot > 0) { - slot--; - } - // FIXME find max key - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[slot].key); - if (found_key.objectid != inode->i_ino) - goto insert; - if (btrfs_key_type(&found_key) != BTRFS_INLINE_DATA_KEY) - goto insert; - copy_size = btrfs_item_size(leaf->items + slot); - if (found_key.offset + copy_size <= pos) - goto insert; - dst = btrfs_item_ptr(leaf, path->slots[0], char); - dst += pos - found_key.offset; - copy_size = copy_size - (pos - found_key.offset); - BUG_ON(copy_size < 0); - copy_size = min(write_bytes, copy_size); - WARN_ON(copy_size == 0); - goto copyit; - } -insert: - btrfs_release_path(root, path); - copy_size = min(write_bytes, - (size_t)BTRFS_LEAF_DATA_SIZE(root) - - sizeof(struct btrfs_item) * 4); - ret = btrfs_insert_empty_item(trans, root, path, &key, copy_size); - BUG_ON(ret); - dst = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char); -copyit: - WARN_ON(copy_size == 0); - WARN_ON(dst + copy_size > - btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], char) + - btrfs_item_size(btrfs_buffer_leaf(path->nodes[0])->items + - path->slots[0])); - btrfs_memcpy(root, path->nodes[0]->b_data, dst, - page_address(page) + offset, copy_size); - mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root, path); - pos += copy_size; - offset += copy_size; - num_written += copy_size; - write_bytes -= copy_size; - if (write_bytes) - goto again; -out: - btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - return num_written ? num_written : err; -} - -static ssize_t btrfs_file_inline_write(struct file *file, - const char __user *buf, - size_t count, loff_t *ppos) -{ - loff_t pos; - size_t num_written = 0; - int err = 0; - int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - unsigned long page_index; - - if (file->f_flags & O_DIRECT) - return -EINVAL; - pos = *ppos; - - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - current->backing_dev_info = inode->i_mapping->backing_dev_info; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (count == 0) - goto out; - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - file_update_time(file); - mutex_lock(&inode->i_mutex); - while(count > 0) { - size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); - struct page *page; - - page_index = pos >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, page_index); - if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); - BUG_ON(ret); - lock_page(page); - } - ret = btrfs_copy_from_user(pos, 1, - write_bytes, &page, buf); - BUG_ON(ret); - write_bytes = inline_one_page(root, inode, page, pos, - offset, write_bytes); - SetPageUptodate(page); - if (write_bytes > 0 && pos + write_bytes > inode->i_size) { - i_size_write(inode, pos + write_bytes); - mark_inode_dirty(inode); - } - page_cache_release(page); - unlock_page(page); - if (write_bytes < 0) - goto out_unlock; - buf += write_bytes; - count -= write_bytes; - pos += write_bytes; - num_written += write_bytes; - - balance_dirty_pages_ratelimited(inode->i_mapping); - cond_resched(); - } -out_unlock: - mutex_unlock(&inode->i_mutex); -out: - *ppos = pos; - current->backing_dev_info = NULL; - return num_written ? num_written : err; -} -#endif - static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset, unsigned long size) { @@ -2422,6 +2205,7 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; int ret = 0; + struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; u64 root_dirid; @@ -2440,12 +2224,12 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return -ENOMEM; root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args.name, namelen, 0); mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); - if (ret == 0) + if (di && !IS_ERR(di)) return -EEXIST; if (root == root->fs_info->tree_root) -- cgit v1.2.3 From 8fd17795b2261ecb1bad2a6df09ef14c4957a3fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 21:01:03 -0400 Subject: Btrfs: early fsync support Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 37 ++++++++++++++++++++++++++++++++++++- fs/btrfs/transaction.c | 25 ++++++++++++++++++++++--- fs/btrfs/transaction.h | 1 + 7 files changed, 62 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 8d7e5a3ed629..5ff2cef9f028 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add parent pointer back to extent map. +* Implement real list of pending transactions * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d628a3b9441c..047a84762d93 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -256,6 +256,7 @@ struct btrfs_fs_info { struct inode *btree_inode; struct mutex trans_mutex; struct mutex fs_mutex; + struct list_head trans_list; struct crypto_hash *hash_tfm; spinlock_t hash_lock; struct kobject kobj; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11e17a2f736b..b666c3571aee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -528,6 +528,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 72be9836932f..a42943bd9179 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -100,7 +100,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, BUG_ON(refs == 0); if (refs == 1) { ret = btrfs_del_item(trans, root, path); -printk("deleting root %Lu %Lu %u\n", key->objectid, key->offset, key->flags); } else { btrfs_set_root_refs(ri, refs - 1); printk("ref now %u root %Lu %Lu %u\n", refs -1, key->objectid, key->offset, key->flags); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index dff2db0d1dc2..f99c764a59db 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -932,6 +933,26 @@ out_unlock: return err; } +static int btrfs_sync_file(struct file *file, + struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); +out: + return ret > 0 ? EIO : ret; +} + static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; @@ -2353,6 +2374,19 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } +static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct btrfs_root *root = btrfs_sb(dentry->d_sb); + struct btrfs_super_block *disk_super = root->fs_info->disk_super; + + buf->f_namelen = BTRFS_NAME_LEN; + buf->f_blocks = btrfs_super_total_blocks(disk_super); + buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_bavail = buf->f_bfree; + buf->f_bsize = dentry->d_sb->s_blocksize; + buf->f_type = BTRFS_SUPER_MAGIC; + return 0; +} static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2362,7 +2396,6 @@ static struct file_system_type btrfs_fs_type = { }; static struct super_operations btrfs_super_ops = { - .statfs = simple_statfs, .delete_inode = btrfs_delete_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, @@ -2371,6 +2404,7 @@ static struct super_operations btrfs_super_ops = { .write_inode = btrfs_write_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, + .statfs = btrfs_statfs, }; static struct inode_operations btrfs_dir_inode_operations = { @@ -2413,6 +2447,7 @@ static struct file_operations btrfs_file_operations = { .mmap = generic_file_mmap, .open = generic_file_open, .ioctl = btrfs_ioctl, + .fsync = btrfs_sync_file, }; static int __init init_btrfs_fs(void) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8740752f3845..078cb9cbf9dd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ static void put_transaction(struct btrfs_transaction *transaction) if (transaction->use_count == 0) { WARN_ON(total_trans == 0); total_trans--; + list_del_init(&transaction->list); memset(transaction, 0, sizeof(*transaction)); kmem_cache_free(btrfs_transaction_cachep, transaction); } @@ -43,6 +44,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + list_add_tail(&cur_trans->list, &root->fs_info->trans_list); } cur_trans->num_writers++; return 0; @@ -236,6 +238,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, { int ret = 0; struct btrfs_transaction *cur_trans; + struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; DEFINE_WAIT(wait); @@ -272,13 +275,29 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, BUG_ON(ret); cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; - btrfs_set_super_generation(root->fs_info->disk_super, - root->fs_info->generation + 1); + if (cur_trans->list.prev != &root->fs_info->trans_list) { + prev_trans = list_entry(cur_trans->list.prev, + struct btrfs_transaction, list); + if (prev_trans->commit_done) + prev_trans = NULL; + else + prev_trans->use_count++; + } mutex_unlock(&root->fs_info->trans_mutex); + mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); + if (prev_trans) { + mutex_lock(&root->fs_info->trans_mutex); + wait_for_commit(root, prev_trans); + put_transaction(prev_trans); + mutex_unlock(&root->fs_info->trans_mutex); + } + btrfs_set_super_generation(root->fs_info->disk_super, + cur_trans->transid); BUG_ON(ret); - write_ctree_super(trans, root); + + mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root); mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4f1496ae6f24..3cc29900a074 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -8,6 +8,7 @@ struct btrfs_transaction { int use_count; int commit_done; int magic; + struct list_head list; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; -- cgit v1.2.3 From 4d775673091d43b39fa9d086071009f98dec289e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 20:23:12 -0400 Subject: Btrfs: add owner and type fields to the extents aand block headers Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 +++++ fs/btrfs/ctree.h | 45 ++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 15 ++++++++++----- fs/btrfs/super.c | 13 +++++++------ 5 files changed, 65 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1bae035f5171..b4783bf8bf4f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -68,6 +68,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); + btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); btrfs_inc_ref(trans, root, buf); if (buf == root->node) { root->node = cow; @@ -806,6 +807,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&c->header, level); btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); btrfs_set_header_generation(&c->header, trans->transid); + btrfs_set_header_owner(&c->header, root->root_key.objectid); lower = btrfs_buffer_node(path->nodes[level-1]); memcpy(c->header.fsid, root->fs_info->disk_super->fsid, sizeof(c->header.fsid)); @@ -909,6 +911,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); btrfs_set_header_generation(&split->header, trans->transid); + btrfs_set_header_owner(&split->header, root->root_key.objectid); memcpy(split->header.fsid, root->fs_info->disk_super->fsid, sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; @@ -1280,6 +1283,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); @@ -1376,6 +1380,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); btrfs_set_header_generation(&right->header, trans->transid); + btrfs_set_header_owner(&right->header, root->root_key.objectid); btrfs_set_header_level(&right->header, 0); memcpy(right->header.fsid, root->fs_info->disk_super->fsid, sizeof(right->header.fsid)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 681b23933d9b..78248d577290 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -62,6 +62,7 @@ struct btrfs_header { u8 fsid[16]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; + __le64 owner; __le16 nritems; __le16 flags; u8 level; @@ -151,12 +152,17 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; +/* values for the type field in btrfs_extent_item */ +#define BTRFS_EXTENT_TREE 1 +#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references */ struct btrfs_extent_item { __le32 refs; + __le64 owner; + u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -473,11 +479,32 @@ static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) ei->refs = cpu_to_le32(val); } +static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +{ + return le64_to_cpu(ei->owner); +} + +static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +{ + ei->owner = cpu_to_le64(val); +} + +static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) +{ + return ei->type; +} + +static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) +{ + ei->type = val; +} + static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); } + static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, u64 val) { @@ -636,6 +663,17 @@ static inline void btrfs_set_header_generation(struct btrfs_header *h, h->generation = cpu_to_le64(val); } +static inline u64 btrfs_header_owner(struct btrfs_header *h) +{ + return le64_to_cpu(h->owner); +} + +static inline void btrfs_set_header_owner(struct btrfs_header *h, + u64 val) +{ + h->owner = cpu_to_le64(val); +} + static inline u16 btrfs_header_nritems(struct btrfs_header *h) { return le16_to_cpu(h->nritems); @@ -996,9 +1034,10 @@ int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13046295bf7a..7aff6bb55d91 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -319,6 +319,7 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b2faad3e8791..49f7cd6e067c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -460,9 +460,10 @@ error: * * returns 0 if everything worked, non-zero otherwise. */ -int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) +int btrfs_alloc_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u8 type, u64 num_blocks, u64 search_start, + u64 search_end, struct btrfs_key *ins) { int ret; int pending_ret; @@ -472,6 +473,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item extent_item; btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -508,13 +511,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root) { struct btrfs_key ins; int ret; struct buffer_head *buf; - ret = btrfs_alloc_extent(trans, root, 1, 0, (unsigned long)-1, &ins); + ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, + BTRFS_EXTENT_TREE, + 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f99c764a59db..eba239cce212 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1819,8 +1819,9 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, num_blocks, 1, - (u64)-1, &ins); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + BTRFS_EXTENT_FILE, num_blocks, 1, + (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2017,8 +2018,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_header_level(&leaf->header, 0); btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); btrfs_set_header_generation(&leaf->header, trans->transid); + btrfs_set_header_owner(&leaf->header, root->root_key.objectid); memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); + mark_buffer_dirty(subvol); + brelse(subvol); + subvol = NULL; inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2031,10 +2036,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); - mark_buffer_dirty(subvol); - brelse(subvol); - subvol = NULL; - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); BUG_ON(ret); -- cgit v1.2.3 From 5e82849e63723afe720d4b325d04a989f6247cce Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 23 Apr 2007 19:10:49 -0400 Subject: Btrfs: new subvolume oops fix Signed-off-by: Chris Mason --- fs/btrfs/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eba239cce212..6940a907654d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2013,6 +2013,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); subvol = btrfs_alloc_free_block(trans, root); + if (subvol == NULL) + return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -2022,8 +2024,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, sizeof(leaf->header.fsid)); mark_buffer_dirty(subvol); - brelse(subvol); - subvol = NULL; inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); @@ -2035,6 +2035,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + brelse(subvol); + subvol = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); -- cgit v1.2.3 From b51338628f604d9b4ed3c776839a484a30e12fb7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 11:52:22 -0400 Subject: Btrfs: add dirty_inode call Signed-off-by: Chris Mason --- fs/btrfs/super.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6940a907654d..43f081593c45 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -722,17 +722,27 @@ static int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; + + if (wait) { + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + return ret; +} + +static void btrfs_dirty_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - ret = btrfs_update_inode(trans, root, inode); - if (wait) - btrfs_commit_transaction(trans, root); - else - btrfs_end_transaction(trans, root); + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - return ret; } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -2390,6 +2400,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = BTRFS_SUPER_MAGIC; return 0; } + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2405,6 +2416,7 @@ static struct super_operations btrfs_super_ops = { .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, .write_inode = btrfs_write_inode, + .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, -- cgit v1.2.3 From c62a1920ced752e86f57ab1d4ad0ec65012bce4d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 12:07:39 -0400 Subject: Btrfs: get rid of the extent_item type field Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 16 +--------------- fs/btrfs/extent-tree.c | 5 +---- fs/btrfs/super.c | 3 +-- 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 78248d577290..2d166ca8b104 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -152,9 +152,6 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; }; -/* values for the type field in btrfs_extent_item */ -#define BTRFS_EXTENT_TREE 1 -#define BTRFS_EXTENT_FILE 2 /* * items in the extent btree are used to record the objectid of the * owner of the block and the number of references @@ -162,7 +159,6 @@ struct btrfs_path { struct btrfs_extent_item { __le32 refs; __le64 owner; - u8 type; } __attribute__ ((__packed__)); struct btrfs_inode_timespec { @@ -489,16 +485,6 @@ static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) ei->owner = cpu_to_le64(val); } -static inline u8 btrfs_extent_type(struct btrfs_extent_item *ei) -{ - return ei->type; -} - -static inline void btrfs_set_extent_type(struct btrfs_extent_item *ei, u8 val) -{ - ei->type = val; -} - static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) { return le64_to_cpu(n->ptrs[nr].blockptr); @@ -1036,7 +1022,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index aac3eccffb8e..116519503d0c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -167,7 +167,6 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct ins.offset = 1; ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_type(&extent_item, BTRFS_EXTENT_TREE); btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); for (i = 0; i < extent_root->fs_info->current_insert.flags; i++) { @@ -464,7 +463,7 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u8 type, u64 num_blocks, u64 search_start, + u64 num_blocks, u64 search_start, u64 search_end, struct btrfs_key *ins) { int ret; @@ -476,7 +475,6 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_extent_refs(&extent_item, 1); btrfs_set_extent_owner(&extent_item, owner); - btrfs_set_extent_type(&extent_item, type); if (root == extent_root) { BUG_ON(extent_root->fs_info->current_insert.offset == 0); @@ -520,7 +518,6 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - BTRFS_EXTENT_TREE, 1, 0, (unsigned long)-1, &ins); if (ret) { BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 43f081593c45..c076474c8bff 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1830,8 +1830,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - BTRFS_EXTENT_FILE, num_blocks, 1, - (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); -- cgit v1.2.3 From f68cad0f9eb3c3cc100635bd0ea191ee093cf887 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 24 Apr 2007 12:44:26 -0400 Subject: Btrfs: fixup dirty_inode related deadlocks Signed-off-by: Chris Mason --- fs/btrfs/super.c | 109 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 53 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c076474c8bff..5bbccbc7e3cc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -120,6 +120,58 @@ make_bad: make_bad_inode(inode); } +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); +} + + +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path *path; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; +failed: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + + static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, @@ -166,10 +218,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, dentry->d_inode->i_ctime = dir->i_ctime; err: btrfs_free_path(path); - if (ret == 0) { - inode_dec_link_count(dentry->d_inode); + if (!ret) { dir->i_size -= name_len * 2; - mark_inode_dirty(dir); + btrfs_update_inode(trans, root, dir); + drop_nlink(dentry->d_inode); + btrfs_update_inode(trans, root, dentry->d_inode); } return ret; } @@ -668,56 +721,6 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } -static void fill_inode_item(struct btrfs_inode_item *item, - struct inode *inode) -{ - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); -} - -static int btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_inode_item *inode_item; - struct btrfs_path *path; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, 1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto failed; - } - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); - ret = 0; -failed: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - static int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; -- cgit v1.2.3 From 9078a3e1e4e489dddc636c7bb8780349d4298743 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Apr 2007 16:46:15 -0400 Subject: Btrfs: start of block group code Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 66 ++++++++++++---- fs/btrfs/disk-io.c | 4 + fs/btrfs/extent-tree.c | 204 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/print-tree.c | 7 ++ fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 2 + 6 files changed, 252 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 26d0cdd46f49..419917279e65 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -239,6 +239,19 @@ struct btrfs_device_item { __le64 device_id; } __attribute__ ((__packed__)); +/* tag for the radix tree of block groups in ram */ +#define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_HINTS 8 +#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) +struct btrfs_block_group_item { + __le64 used; +} __attribute__ ((__packed__)); + +struct btrfs_block_group_cache { + struct btrfs_key key; + struct btrfs_block_group_item item; +}; + struct crypto_hash; struct btrfs_fs_info { struct btrfs_root *extent_root; @@ -249,6 +262,7 @@ struct btrfs_fs_info { struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; + struct radix_tree_root block_group_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -301,49 +315,67 @@ struct btrfs_root { * info about object characteristics. There is one for every file and dir in * the FS */ -#define BTRFS_INODE_ITEM_KEY 1 +#define BTRFS_INODE_ITEM_KEY 1 + +/* reserve 2-15 close to the inode for later flexibility */ /* * dir items are the name -> inode pointers in a directory. There is one * for every name in a directory. */ -#define BTRFS_DIR_ITEM_KEY 2 -#define BTRFS_DIR_INDEX_KEY 3 +#define BTRFS_DIR_ITEM_KEY 16 +#define BTRFS_DIR_INDEX_KEY 17 /* - * inline data is file data that fits in the btree. + * extent data is for file data */ -#define BTRFS_INLINE_DATA_KEY 4 -/* - * extent data is for data that can't fit in the btree. It points to - * a (hopefully) huge chunk of disk - */ -#define BTRFS_EXTENT_DATA_KEY 5 +#define BTRFS_EXTENT_DATA_KEY 18 /* * csum items have the checksums for data in the extents */ -#define BTRFS_CSUM_ITEM_KEY 6 +#define BTRFS_CSUM_ITEM_KEY 19 + +/* reserve 20-31 for other file stuff */ /* * root items point to tree roots. There are typically in the root * tree used by the super block to find all the other trees */ -#define BTRFS_ROOT_ITEM_KEY 7 +#define BTRFS_ROOT_ITEM_KEY 32 /* * extent items are in the extent map tree. These record which blocks * are used, and how many references there are to each block */ -#define BTRFS_EXTENT_ITEM_KEY 8 +#define BTRFS_EXTENT_ITEM_KEY 33 + +/* + * block groups give us hints into the extent allocation trees. Which + * blocks are free etc etc + */ +#define BTRFS_BLOCK_GROUP_ITEM_KEY 34 /* * dev items list the devices that make up the FS */ -#define BTRFS_DEV_ITEM_KEY 9 +#define BTRFS_DEV_ITEM_KEY 35 /* * string items are for debugging. They just store a short string of * data in the FS */ -#define BTRFS_STRING_ITEM_KEY 10 +#define BTRFS_STRING_ITEM_KEY 253 + + +static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) +{ + return le64_to_cpu(bi->used); +} + +static inline void btrfs_set_block_group_used(struct + btrfs_block_group_item *bi, + u64 val) +{ + bi->used = cpu_to_le64(val); +} static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) { @@ -1037,6 +1069,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks); +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_free_block_groups(struct btrfs_fs_info *info); +int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 956727f015a5..1c27eb645510 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -529,6 +529,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -613,6 +614,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); + btrfs_read_block_groups(extent_root); + fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); @@ -741,6 +744,7 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); free_dev_radix(fs_info); + btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e6fe3fd38819..0bb4fc83cfd6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -127,6 +127,105 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } +static int write_one_cache_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_block_group_cache *cache) +{ + int ret; + int pending_ret; + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_block_group_item *bi; + struct btrfs_key ins; + + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + BUG_ON(ret); + bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_block_group_item); + memcpy(bi, &cache->item, sizeof(*bi)); + mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(extent_root, path); + + finish_current_insert(trans, extent_root); + pending_ret = del_pending_extents(trans, extent_root); + if (ret) + return ret; + if (pending_ret) + return pending_ret; + return 0; + +} + +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache[8]; + int ret; + int err = 0; + int werr = 0; + struct radix_tree_root *radix = &root->fs_info->block_group_radix; + int i; + struct btrfs_path *path; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, + 0, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + err = write_one_cache_group(trans, root, + path, cache[i]); + if (err) + werr = err; + } + } + btrfs_free_path(path); + return werr; +} + +static int update_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 blocknr, u64 num, int alloc) +{ + struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + u64 total = num; + u64 old_val; + u64 block_in_group; + int ret; + while(total) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&cache, blocknr, 1); + if (!ret) + return -1; + block_in_group = blocknr - cache->key.objectid; + WARN_ON(block_in_group > cache->key.offset); + radix_tree_tag_set(&info->block_group_radix, + cache->key.objectid + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); + + old_val = btrfs_block_group_used(&cache->item); + num = min(total, cache->key.offset - block_in_group); + total -= num; + blocknr += num; + if (alloc) + old_val += num; + else + old_val -= num; + btrfs_set_block_group_used(&cache->item, old_val); + } + return 0; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -264,6 +363,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); + ret = update_block_group(trans, root, blocknr, num_blocks, 0); + BUG_ON(ret); } btrfs_release_path(extent_root, path); btrfs_free_path(path); @@ -365,21 +466,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid == 0 && search_end == (u64)-1) { - struct btrfs_disk_key *last_key; - btrfs_init_path(path); - ins->objectid = (u64)-1; - ins->offset = (u64)-1; - ret = btrfs_search_slot(trans, root, ins, path, 0, 0); - if (ret < 0) - goto error; - BUG_ON(ret == 0); - if (path->slots[0] > 0) - path->slots[0]--; - l = btrfs_buffer_leaf(path->nodes[0]); - last_key = &l->items[path->slots[0]].key; - search_start = btrfs_disk_key_objectid(last_key); - } if (info->last_insert.objectid > search_start) search_start = info->last_insert.objectid; @@ -420,6 +506,8 @@ check_failed: goto check_pending; } btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; if (key.objectid >= search_start) { if (start_found) { if (last_block < search_start) @@ -434,6 +522,7 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; +next: path->slots[0]++; } // FIXME -ENOSPC @@ -498,7 +587,6 @@ error: btrfs_free_path(path); return ret; } - /* * finds a free extent and does all the dirty work required for allocation * returns the key for the extent through ins, and a tree buffer for @@ -532,6 +620,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ins->objectid = info->extent_tree_prealloc[nr]; info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; + ret = update_block_group(trans, root, + ins->objectid, ins->offset, 1); + BUG_ON(ret); return 0; } /* do the real allocation */ @@ -558,6 +649,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); return 0; } @@ -578,6 +670,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG(); return NULL; } + BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); return buf; @@ -758,3 +851,82 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_free_path(path); return ret; } + +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + struct btrfs_block_group_cache *cache[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, 0, + ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1); + kfree(cache[i]); + } + } + return 0; +} + +int btrfs_read_block_groups(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + int err = 0; + struct btrfs_block_group_item *bi; + struct btrfs_block_group_cache *cache; + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_leaf *leaf; + u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + + root = root->fs_info->extent_root; + key.objectid = 0; + key.offset = group_size_blocks; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(1) { + ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret != 0) { + err = ret; + break; + } + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + cache = kmalloc(sizeof(*cache), GFP_NOFS); + if (!cache) { + err = -1; + break; + } + bi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + memcpy(&cache->item, bi, sizeof(*bi)); + memcpy(&cache->key, &found_key, sizeof(found_key)); + key.objectid = found_key.objectid + found_key.offset; + btrfs_release_path(root, path); + ret = radix_tree_insert(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + (void *)cache); + BUG_ON(ret); + if (key.objectid >= + btrfs_super_total_blocks(root->fs_info->disk_super)) + break; + } + + btrfs_free_path(path); + return 0; +} diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1e7038b070ae..2f95fc67a036 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_root_item *ri; struct btrfs_dir_item *di; struct btrfs_inode_item *ii; + struct btrfs_block_group_item *bi; u32 type; printk("leaf %Lu total ptrs %d free space %d\n", @@ -53,6 +54,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) printk("\t\textent data refs %u\n", btrfs_extent_refs(ei)); break; + case BTRFS_BLOCK_GROUP_ITEM_KEY: + bi = btrfs_item_ptr(l, i, + struct btrfs_block_group_item); + printk("\t\tblock group used %Lu\n", + btrfs_block_group_used(bi)); + break; case BTRFS_STRING_ITEM_KEY: printk("\t\titem data %.*s\n", btrfs_item_size(item), btrfs_leaf_data(l) + btrfs_item_offset(item)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5bbccbc7e3cc..edcebf79b04a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -377,7 +377,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && - btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY && btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 078cb9cbf9dd..8a2545f6a5b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -125,6 +125,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_set_super_device_root(fs_info->disk_super, bh_blocknr(dev_root->node)); } + btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == bh_blocknr(extent_root->node)) @@ -135,6 +136,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); + btrfs_write_dirty_block_groups(trans, extent_root); } return 0; } -- cgit v1.2.3 From cd1bc4653dc37f6390f4d6df4f987044c64f700b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 10:08:34 -0400 Subject: Btrfs: more block allocator work Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 95 +++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/super.c | 3 ++ 4 files changed, 90 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 419917279e65..c432222d40e3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -250,6 +250,8 @@ struct btrfs_block_group_item { struct btrfs_block_group_cache { struct btrfs_key key; struct btrfs_block_group_item item; + u64 first_free; + u64 last_alloc; }; struct crypto_hash; @@ -257,7 +259,7 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_key last_insert; + struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c27eb645510..2489ffa5fb38 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -558,7 +558,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0bb4fc83cfd6..71e3b311fc42 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,63 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int find_search_start(struct btrfs_root *root, int data) +{ + struct btrfs_block_group_cache *cache[8]; + struct btrfs_fs_info *info = root->fs_info; + u64 used; + u64 last; + int i; + int ret; + + cache[0] = info->block_group_cache; + if (!cache[0]) + goto find_new; + used = btrfs_block_group_used(&cache[0]->item); + if (used < (cache[0]->key.offset * 3 / 2)) + return 0; +find_new: + last = 0; + while(1) { + ret = radix_tree_gang_lookup_tag(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache), + BTRFS_BLOCK_GROUP_DIRTY); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + last = 0; + while(1) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)cache, + last, ARRAY_SIZE(cache)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + used = btrfs_block_group_used(&cache[i]->item); + if (used < (cache[i]->key.offset * 3 / 2)) { + info->block_group_cache = cache[i]; + cache[i]->last_alloc = cache[i]->first_free; + return 0; + } + last = cache[i]->key.objectid + + cache[i]->key.offset - 1; + } + } + info->block_group_cache = NULL; + return 0; +} + int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks) @@ -205,8 +262,11 @@ static int update_block_group(struct btrfs_trans_handle *trans, while(total) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)&cache, blocknr, 1); - if (!ret) + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); return -1; + } block_in_group = blocknr - cache->key.objectid; WARN_ON(block_in_group > cache->key.offset); radix_tree_tag_set(&info->block_group_radix, @@ -217,10 +277,15 @@ static int update_block_group(struct btrfs_trans_handle *trans, num = min(total, cache->key.offset - block_in_group); total -= num; blocknr += num; - if (alloc) + if (alloc) { old_val += num; - else + if (blocknr > cache->last_alloc) + cache->last_alloc = blocknr; + } else { old_val -= num; + if (blocknr < cache->first_free) + cache->first_free = blocknr; + } btrfs_set_block_group_used(&cache->item, old_val); } return 0; @@ -246,9 +311,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct clear_radix_bit(pinned_radix, gang[i]); } } - if (root->fs_info->last_insert.objectid > first) - root->fs_info->last_insert.objectid = first; - root->fs_info->last_insert.offset = 0; + root->fs_info->block_group_cache = NULL; return 0; } @@ -466,8 +529,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3; } - if (info->last_insert.objectid > search_start) - search_start = info->last_insert.objectid; + find_search_start(root, 0); + if (info->block_group_cache && + info->block_group_cache->last_alloc > search_start) + search_start = info->block_group_cache->last_alloc; check_failed: btrfs_init_path(path); @@ -567,8 +632,7 @@ check_pending: total_found < total_needed) { nr = total_needed - total_found - 1; BUG_ON(nr < 0); - root->fs_info->extent_tree_prealloc[nr] = - test_block; + info->extent_tree_prealloc[nr] = test_block; total_found++; test_block++; } @@ -576,9 +640,14 @@ check_pending: search_start = test_block; goto check_failed; } - root->fs_info->extent_tree_prealloc_nr = total_found; + info->extent_tree_prealloc_nr = total_found; + } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&info->block_group_cache, + ins->objectid, 1); + if (ret) { + info->block_group_cache->last_alloc = ins->objectid; } - root->fs_info->last_insert.objectid = ins->objectid; ins->offset = num_blocks; btrfs_free_path(path); return 0; @@ -915,6 +984,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); + cache->last_alloc = 0; + cache->first_free = 0; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index edcebf79b04a..a10e902d3102 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -223,6 +223,7 @@ err: btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); btrfs_update_inode(trans, root, dentry->d_inode); + dir->i_sb->s_dirt = 1; } return ret; } @@ -411,6 +412,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, error: btrfs_release_path(root, path); btrfs_free_path(path); + inode->i_sb->s_dirt = 1; return ret; } @@ -935,6 +937,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_fail; d_instantiate(dentry, inode); drop_on_err = 0; + dir->i_sb->s_dirt = 1; out_fail: btrfs_end_transaction(trans, root); -- cgit v1.2.3 From 7c4452b9a6ca7aabe37ea2e43d443110bdc08cd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 28 Apr 2007 09:29:35 -0400 Subject: Btrfs: smarter transaction writeback Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 37 +++++++++++++++++++++++++++++++++++-- fs/btrfs/transaction.h | 1 + 4 files changed, 37 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index afc5267515e2..652cf305a967 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -762,6 +762,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a10e902d3102..1890e8648dbd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -980,7 +980,6 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a2545f6a5b6..f9b8864dcc40 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -45,6 +45,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->use_count = 1; cur_trans->commit_done = 0; list_add_tail(&cur_trans->list, &root->fs_info->trans_list); + init_bit_radix(&cur_trans->dirty_pages); } cur_trans->num_writers++; return 0; @@ -106,8 +107,40 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - filemap_write_and_wait(root->fs_info->btree_inode->i_mapping); - return 0; + unsigned long gang[16]; + int ret; + int i; + int err; + int werr = 0; + struct page *page; + struct radix_tree_root *dirty_pages; + struct inode *btree_inode = root->fs_info->btree_inode; + + if (!trans || !trans->transaction) { + return filemap_write_and_wait(btree_inode->i_mapping); + } + dirty_pages = &trans->transaction->dirty_pages; + while(1) { + ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + /* FIXME EIO */ + clear_radix_bit(dirty_pages, gang[i]); + page = find_lock_page(btree_inode->i_mapping, + gang[i]); + if (!page) + continue; + err = write_one_page(page, 0); + if (err) + werr = err; + page_cache_release(page); + } + } + err = filemap_fdatawait(btree_inode->i_mapping); + if (err) + werr = err; + return werr; } int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 3cc29900a074..afe42d167cee 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -9,6 +9,7 @@ struct btrfs_transaction { int commit_done; int magic; struct list_head list; + struct radix_tree_root dirty_pages; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; -- cgit v1.2.3 From 31f3c99b73483f7b738a886c552050cbd6128ff3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 30 Apr 2007 15:25:45 -0400 Subject: Btrfs: allocator improvements, inode block groups Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 1 + fs/btrfs/ctree.c | 10 ++-- fs/btrfs/ctree.h | 18 ++++++- fs/btrfs/extent-tree.c | 138 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/super.c | 53 +++++++++++++++---- fs/btrfs/transaction.c | 1 + fs/btrfs/transaction.h | 15 ++++++ 7 files changed, 181 insertions(+), 55 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 43a4f1ddb375..6b50076b02d5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -3,6 +3,7 @@ struct btrfs_inode { struct btrfs_root *root; + struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b4783bf8bf4f..dbd3f636dd3d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -61,7 +61,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *cow_ret = buf; return 0; } - cow = btrfs_alloc_free_block(trans, root); + cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -800,7 +800,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root); + t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -905,7 +905,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root); + split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1277,7 +1277,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); @@ -1374,7 +1374,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root); + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); BUG_ON(!right_buffer); right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c432222d40e3..e6bf9919536a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,6 +174,7 @@ struct btrfs_inode_item { __le64 generation; __le64 size; __le64 nblocks; + __le64 block_group; __le32 nlink; __le32 uid; __le32 gid; @@ -241,6 +242,7 @@ struct btrfs_device_item { /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 +#define BTRFS_BLOCK_GROUP_AVAIL 1 #define BTRFS_BLOCK_GROUP_HINTS 8 #define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024) struct btrfs_block_group_item { @@ -410,6 +412,17 @@ static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) i->nblocks = cpu_to_le64(val); } +static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) +{ + return le64_to_cpu(i->block_group); +} + +static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, + u64 val) +{ + i->block_group = cpu_to_le64(val); +} + static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) { return le32_to_cpu(i->nlink); @@ -1054,10 +1067,13 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, u64 hint); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 62051a36664a..8b8cbe25fffb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,42 +12,57 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); -static int find_search_start(struct btrfs_root *root, int data) +struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache + *hint, int data) { struct btrfs_block_group_cache *cache[8]; + struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; - u64 last; + u64 last = 0; + u64 hint_last; int i; int ret; - - cache[0] = info->block_group_cache; - if (!cache[0]) - goto find_new; - used = btrfs_block_group_used(&cache[0]->item); - if (used < (cache[0]->key.offset * 3 / 2)) - return 0; -find_new: - last = 0; + int full_search = 0; + if (hint) { + used = btrfs_block_group_used(&hint->item); + if (used < (hint->key.offset * 2) / 3) { + return hint; + } + radix_tree_tag_clear(&info->block_group_radix, + hint->key.objectid + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + last = hint->key.objectid + hint->key.offset; + hint_last = last; + } else { + hint_last = 0; + last = 0; + } while(1) { ret = radix_tree_gang_lookup_tag(&info->block_group_radix, (void **)cache, last, ARRAY_SIZE(cache), - BTRFS_BLOCK_GROUP_DIRTY); + BTRFS_BLOCK_GROUP_AVAIL); if (!ret) break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } - last = 0; + last = hint_last; +again: while(1) { ret = radix_tree_gang_lookup(&info->block_group_radix, (void **)cache, @@ -56,17 +71,32 @@ find_new: break; for (i = 0; i < ret; i++) { used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 3 / 2)) { + if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; - cache[i]->last_alloc = cache[i]->first_free; - return 0; + found_group = cache[i]; + goto found; } + radix_tree_tag_clear(&info->block_group_radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); last = cache[i]->key.objectid + - cache[i]->key.offset - 1; + cache[i]->key.offset; } } info->block_group_cache = NULL; - return 0; + if (!full_search) { + last = 0; + full_search = 1; + goto again; + } +found: + if (!found_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&found_group, 0, 1); + BUG_ON(ret != 1); + } + return found_group; } int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, @@ -243,6 +273,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; + cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); @@ -322,10 +353,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btree_inode->i_blkbits)); } } - if (root->fs_info->block_group_cache) { - root->fs_info->block_group_cache->last_alloc = - root->fs_info->block_group_cache->first_free; - } return 0; } @@ -532,22 +559,43 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; + int update_block_group = 0; + struct btrfs_block_group_cache *hint_block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); + /* find search start here */ + if (0 && search_start && num_blocks) { + u64 used; + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&hint_block_group, + search_start, 1); + if (ret) { + used = btrfs_block_group_used(&hint_block_group->item); + if (used > (hint_block_group->key.offset * 9) / 10) + search_start = 0; + else if (search_start < hint_block_group->last_alloc) + search_start = hint_block_group->last_alloc; + } else { + search_start = 0; + } + } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - find_search_start(root, 0); - if (info->block_group_cache && - info->block_group_cache->last_alloc > search_start) - search_start = info->block_group_cache->last_alloc; - + if (1 || !search_start) { + trans->block_group = btrfs_find_block_group(root, + trans->block_group, + 0); + if (trans->block_group->last_alloc > search_start) + search_start = trans->block_group->last_alloc; + update_block_group = 1; + } check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -662,11 +710,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&info->block_group_cache, - ins->objectid, 1); - if (ret) { - info->block_group_cache->last_alloc = ins->objectid; + if (update_block_group) { + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&trans->block_group, + ins->objectid, 1); + if (ret) { + trans->block_group->last_alloc = ins->objectid; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -747,14 +797,14 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * returns the tree buffer or NULL. */ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root) + struct btrfs_root *root, u64 hint) { struct btrfs_key ins; int ret; struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins); + 1, hint, (unsigned long)-1, &ins); if (ret) { BUG(); return NULL; @@ -975,6 +1025,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; + u64 used; root = root->fs_info->extent_root; key.objectid = 0; @@ -1005,8 +1056,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_block_group_item); memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); - cache->last_alloc = 0; - cache->first_free = 0; + cache->last_alloc = cache->key.objectid; + cache->first_free = cache->key.objectid; key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); ret = radix_tree_insert(&root->fs_info->block_group_radix, @@ -1014,6 +1065,13 @@ int btrfs_read_block_groups(struct btrfs_root *root) found_key.offset - 1, (void *)cache); BUG_ON(ret); + used = btrfs_block_group_used(bi); + if (used < (key.offset * 2) / 3) { + radix_tree_tag_set(&root->fs_info->block_group_radix, + found_key.objectid + + found_key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } if (key.objectid >= btrfs_super_total_blocks(root->fs_info->disk_super)) break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1890e8648dbd..7ecbe7c86186 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,8 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; + struct btrfs_block_group_cache *alloc_group; + u64 alloc_group_block; int ret; path = btrfs_alloc_path(); @@ -82,6 +84,12 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, + (void **)&alloc_group, + alloc_group_block, 1); + BUG_ON(!ret); + BTRFS_I(inode)->block_group = alloc_group; btrfs_free_path(path); inode_item = NULL; @@ -136,6 +144,8 @@ static void fill_inode_item(struct btrfs_inode_item *item, btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); btrfs_set_inode_nblocks(item, inode->i_blocks); btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); } @@ -237,6 +247,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -262,6 +273,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_init_path(path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; key.flags = (u32)-1; @@ -429,6 +441,7 @@ static void btrfs_delete_inode(struct inode *inode) inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); if (S_ISREG(inode->i_mode)) { ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); @@ -731,6 +744,7 @@ static int btrfs_write_inode(struct inode *inode, int wait) if (wait) { mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } @@ -744,6 +758,7 @@ static void btrfs_dirty_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -751,7 +766,9 @@ static void btrfs_dirty_inode(struct inode *inode) static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 objectid, int mode) + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) { struct inode *inode; struct btrfs_inode_item inode_item; @@ -763,6 +780,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; + group = btrfs_find_block_group(root, group, 0); + BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -832,6 +851,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -839,11 +859,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_unlock; - // FIXME mark the inode dirty + + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode); if (err) drop_inode = 1; @@ -853,6 +875,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; } dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -904,6 +928,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -915,7 +940,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlock; } - inode = btrfs_new_inode(trans, root, objectid, S_IFDIR | mode); + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out_fail; @@ -923,6 +949,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); if (err) @@ -938,6 +965,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); out_fail: btrfs_end_transaction(trans, root); @@ -1349,6 +1378,7 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); ret = btrfs_end_transaction(trans, root); @@ -1445,6 +1475,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { @@ -1481,6 +1512,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1821,6 +1853,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, mutex_unlock(&root->fs_info->fs_mutex); goto out_unlock; } + btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; if (start_pos < inode->i_size) { @@ -1845,6 +1878,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; + btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2017,6 +2051,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_leaf *leaf; struct btrfs_root *new_root; struct inode *inode; + struct inode *dir; int ret; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -2025,7 +2060,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root); + subvol = btrfs_alloc_free_block(trans, root, 0); if (subvol == NULL) return -ENOSPC; leaf = btrfs_buffer_leaf(subvol); @@ -2069,10 +2104,9 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) * insert the directory item */ key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + name, namelen, dir->i_ino, &key, 0); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2084,7 +2118,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(new_root, 1); BUG_ON(!trans); - inode = btrfs_new_inode(trans, new_root, new_dirid, S_IFDIR | 0700); + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f9b8864dcc40..bdbf514c111f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -77,6 +77,7 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, h->transaction = root->fs_info->running_transaction; h->blocks_reserved = num_blocks; h->blocks_used = 0; + h->block_group = NULL; root->fs_info->running_transaction->use_count++; mutex_unlock(&root->fs_info->trans_mutex); h->magic = h->magic2 = TRANS_MAGIC; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index afe42d167cee..b5378119e76c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,5 +1,6 @@ #ifndef __TRANSACTION__ #define __TRANSACTION__ +#include "btrfs_inode.h" struct btrfs_transaction { u64 transid; @@ -20,10 +21,24 @@ struct btrfs_trans_handle { unsigned long blocks_reserved; unsigned long blocks_used; struct btrfs_transaction *transaction; + struct btrfs_block_group_cache *block_group; int magic2; }; +static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans, + struct inode *inode) +{ + trans->block_group = BTRFS_I(inode)->block_group; +} + +static inline void btrfs_update_inode_block_group(struct + btrfs_trans_handle *trans, + struct inode *inode) +{ + BTRFS_I(inode)->block_group = trans->block_group; +} + int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, -- cgit v1.2.3 From 090d18753c7fb73f7d846c3a89a50cd35136d144 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 May 2007 08:53:32 -0400 Subject: Btrfs: directory readahead Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++++- fs/btrfs/disk-io.h | 6 ++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1b6e13a5ae8..354524adf984 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -253,6 +254,29 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; +int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct buffer_head *bh = NULL; + + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return 0; + if (buffer_uptodate(bh)) + goto done; + if (test_set_buffer_locked(bh)) + goto done; + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + } else { + unlock_buffer(bh); + } +done: + brelse(bh); + return 0; +} + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; @@ -270,11 +294,14 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; - csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } uptodate: + if (!buffer_checked(bh)) { + csum_tree_block(root, bh, 1); + set_buffer_checked(bh); + } if (check_tree_block(root, bh)) BUG(); return bh; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 444ebb0141ae..1ee7d2a55b5b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -5,6 +5,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) +enum btrfs_bh_state_bits { + BH_Checked = BH_PrivateStart, +}; +BUFFER_FNS(Checked, checked); + static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) { return (struct btrfs_node *)bh->b_data; @@ -21,6 +26,7 @@ static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +int readahead_tree_block(struct btrfs_root *root, u64 blocknr); struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr); int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8b8cbe25fffb..2837fddf7b1c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -812,6 +812,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); set_buffer_uptodate(buf); + set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7ecbe7c86186..a29a781b86c2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -585,6 +585,31 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 objectid; + u64 item_objectid; + u64 blocknr; + int slot; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + readahead_tree_block(root, blocknr); + } +} + static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; @@ -619,6 +644,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; + reada_leaves(root, path); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); @@ -631,6 +657,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; + if (path->slots[1] == 0) + reada_leaves(root, path); } else { slot++; path->slots[0]++; -- cgit v1.2.3 From 35b7e476107e3d54f03384e0f2fa3dfd68933353 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 May 2007 15:53:43 -0400 Subject: Btrfs: fix page cache memory leak Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 2 -- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 11 +++++----- fs/btrfs/disk-io.c | 8 ++++++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/extent-tree.c | 12 +---------- fs/btrfs/hash.c | 1 + fs/btrfs/print-tree.c | 1 + fs/btrfs/super.c | 56 ++++++++++++++++++++++++++++++-------------------- fs/btrfs/transaction.c | 8 +++++--- 10 files changed, 59 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 9fc42e99c7df..e9ace32d7bae 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -22,9 +22,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) return -ENOMEM; memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); bits[0] = slot; - radix_tree_preload(GFP_NOFS); ret = radix_tree_insert(radix, slot, bits); - radix_tree_preload_end(); if (ret) return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e6bf9919536a..b5855a5365ef 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -8,6 +8,9 @@ struct btrfs_trans_handle; struct btrfs_transaction; +extern struct kmem_cache *btrfs_trans_handle_cachep; +extern struct kmem_cache *btrfs_transaction_cachep; +extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 7a7e9846860a..00a28d90fea6 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -4,11 +4,12 @@ #include "hash.h" #include "transaction.h" -struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *cpu_key, - u32 data_size) +static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle + *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, + u32 data_size) { int ret; char *ptr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354524adf984..5828a104dfef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -694,7 +694,7 @@ static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) return 0; } -int del_fs_roots(struct btrfs_fs_info *fs_info) +static int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; @@ -781,3 +781,7 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } +void btrfs_btree_balance_dirty(struct btrfs_root *root) +{ + balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); +} diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 1ee7d2a55b5b..822ccb8aa4ae 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -55,4 +55,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); +int btrfs_releasepage(struct page *page, gfp_t flags); +void btrfs_btree_balance_dirty(struct btrfs_root *root); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2837fddf7b1c..0e20d1c42fca 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -322,18 +322,10 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } -static int try_remove_page(struct address_space *mapping, unsigned long index) -{ - int ret; - ret = invalidate_mapping_pages(mapping, index, index); - return ret; -} - int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; - struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -348,9 +340,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - try_remove_page(btree_inode->i_mapping, - gang[i] << (PAGE_CACHE_SHIFT - - btree_inode->i_blkbits)); } } return 0; @@ -983,6 +972,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; + btrfs_btree_balance_dirty(root); } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 22519b8e0cf2..32de1ea1b64e 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -11,6 +11,7 @@ */ #include +#include "hash.h" #define DELTA 0x9E3779B9 static void TEA_transform(__u32 buf[2], __u32 const in[]) diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 2f95fc67a036..28813411de66 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,6 +1,7 @@ #include #include "ctree.h" #include "disk-io.h" +#include "print-tree.h" void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a29a781b86c2..130a1d3d9f5f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,14 +18,14 @@ #include "btrfs_inode.h" #include "ioctl.h" -void btrfs_fsinfo_release(struct kobject *obj) +static void btrfs_fsinfo_release(struct kobject *obj) { struct btrfs_fs_info *fsinfo = container_of(obj, struct btrfs_fs_info, kobj); kfree(fsinfo); } -struct kobj_type btrfs_fsinfo_ktype = { +static struct kobj_type btrfs_fsinfo_ktype = { .release = btrfs_fsinfo_release, }; @@ -148,7 +148,6 @@ static void fill_inode_item(struct btrfs_inode_item *item, BTRFS_I(inode)->block_group->key.objectid); } - static int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -251,6 +250,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return ret; } @@ -324,6 +324,7 @@ out: btrfs_free_path(path); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); if (ret && !err) err = ret; return err; @@ -449,6 +450,7 @@ static void btrfs_delete_inode(struct inode *inode) btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return; no_delete: clear_inode(inode); @@ -481,7 +483,7 @@ out: return ret; } -int fixup_tree_root_location(struct btrfs_root *root, +static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, struct btrfs_root **sub_root) { @@ -512,7 +514,7 @@ int fixup_tree_root_location(struct btrfs_root *root, return 0; } -int btrfs_init_locked_inode(struct inode *inode, void *p) +static int btrfs_init_locked_inode(struct inode *inode, void *p) { struct btrfs_iget_args *args = p; inode->i_ino = args->ino; @@ -520,15 +522,15 @@ int btrfs_init_locked_inode(struct inode *inode, void *p) return 0; } -int btrfs_find_actor(struct inode *inode, void *opaque) +static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; return (args->ino == inode->i_ino && args->root == BTRFS_I(inode)->root); } -struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) +static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; @@ -790,6 +792,7 @@ static void btrfs_dirty_inode(struct inode *inode) btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -913,6 +916,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } + btrfs_btree_balance_dirty(root); return err; } @@ -1002,6 +1006,7 @@ out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); + btrfs_btree_balance_dirty(root); return err; } @@ -1099,7 +1104,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - btrfs_release_path(root, path); goto out; } found_type = btrfs_file_extent_type(item); @@ -1135,7 +1139,6 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } out: - btrfs_release_path(root, path); btrfs_free_path(path); return err; } @@ -1231,13 +1234,13 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { WARN_ON(bh->b_size != blocksize); err = btrfs_get_block(inode, block, bh, 0); - if (err) + if (err) { +printk("writepage going to recovery err %d\n", err); goto recover; + } if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); } } bh = bh->b_this_page; @@ -1303,11 +1306,6 @@ done: if (uptodate) SetPageUptodate(page); end_page_writeback(page); - /* - * The page and buffer_heads can be released at any time from - * here on. - */ - wbc->pages_skipped++; /* We didn't write this page */ } return err; @@ -1409,10 +1407,11 @@ static void btrfs_truncate(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); + btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - mark_inode_dirty(inode); + btrfs_btree_balance_dirty(root); } /* @@ -1777,10 +1776,15 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; goto failed_release; } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); offset = pos & (PAGE_CACHE_SIZE -1); this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - create_empty_buffers(pages[i], root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); + if (!page_has_buffers(pages[i])) { + create_empty_buffers(pages[i], + root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + } head = page_buffers(pages[i]); bh = head; do { @@ -1820,7 +1824,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page *pages[8]; - struct page *pinned[2] = { NULL, NULL }; + struct page *pinned[2]; unsigned long first_index; unsigned long last_index; u64 start_pos; @@ -1829,6 +1833,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, struct btrfs_trans_handle *trans; struct btrfs_key ins; + pinned[0] = NULL; + pinned[1] = NULL; if (file->f_flags & O_DIRECT) return -EINVAL; pos = *ppos; @@ -1858,6 +1864,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[0])) { ret = mpage_readpage(pinned[0], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[0]); } else { unlock_page(pinned[0]); } @@ -1869,6 +1876,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (!PageUptodate(pinned[1])) { ret = mpage_readpage(pinned[1], btrfs_get_block); BUG_ON(ret); + wait_on_page_locked(pinned[1]); } else { unlock_page(pinned[1]); } @@ -1940,6 +1948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, num_written += write_bytes; balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_btree_balance_dirty(root); cond_resched(); } out_unlock: @@ -2165,6 +2174,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) iput(inode); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2220,6 +2230,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); return 0; } @@ -2295,6 +2306,7 @@ out: mutex_unlock(&root->fs_info->fs_mutex); out_nolock: btrfs_free_path(path); + btrfs_btree_balance_dirty(root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bdbf514c111f..8bbe9107ff7e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -199,8 +199,9 @@ struct dirty_root { struct btrfs_root *root; }; -int add_dirty_roots(struct btrfs_trans_handle *trans, - struct radix_tree_root *radix, struct list_head *list) +static int add_dirty_roots(struct btrfs_trans_handle *trans, + struct radix_tree_root *radix, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_root *gang[8]; @@ -246,7 +247,8 @@ int add_dirty_roots(struct btrfs_trans_handle *trans, return 0; } -int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) +static int drop_dirty_roots(struct btrfs_root *tree_root, + struct list_head *list) { struct dirty_root *dirty; struct btrfs_trans_handle *trans; -- cgit v1.2.3 From be08c1b9f8e679d45e086728445ac36cf250e92e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 May 2007 09:06:49 -0400 Subject: Btrfs: early metadata/data split Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 95 +++++++++++++++++++++++++------------------------- fs/btrfs/super.c | 2 +- 3 files changed, 50 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b5855a5365ef..179a046ce5a4 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1080,7 +1080,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins); + u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0e20d1c42fca..c5ae51893f78 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6,7 +6,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins); + search_end, struct btrfs_key *ins, int data); static int finish_current_insert(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); static int del_pending_extents(struct btrfs_trans_handle *trans, struct @@ -25,7 +25,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; - if (hint) { + if (!data && hint) { used = btrfs_block_group_used(&hint->item); if (used < (hint->key.offset * 2) / 3) { return hint; @@ -47,6 +47,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < (cache[i]->key.offset * 2) / 3) { info->block_group_cache = cache[i]; @@ -57,8 +65,6 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } last = hint_last; @@ -70,6 +76,14 @@ again: if (!ret) break; for (i = 0; i < ret; i++) { + last = cache[i]->key.objectid + + cache[i]->key.offset; + if (!full_search && !data && + (cache[i]->key.objectid & cache[i]->key.offset)) + continue; + if (!full_search && data && + (cache[i]->key.objectid & cache[i]->key.offset) == 0) + continue; used = btrfs_block_group_used(&cache[i]->item); if (used < cache[i]->key.offset) { info->block_group_cache = cache[i]; @@ -80,8 +94,6 @@ again: cache[i]->key.objectid + cache[i]->key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); - last = cache[i]->key.objectid + - cache[i]->key.offset; } } info->block_group_cache = NULL; @@ -112,7 +124,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u32 refs; find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, - &ins); + &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -225,7 +237,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0); ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], @@ -322,10 +334,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static int try_remove_page(struct address_space *mapping, unsigned long index) +{ + int ret; + ret = invalidate_mapping_pages(mapping, index, index); + return ret; +} + int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { unsigned long gang[8]; + struct inode *btree_inode = root->fs_info->btree_inode; u64 first = 0; int ret; int i; @@ -340,6 +360,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + try_remove_page(btree_inode->i_mapping, + gang[i] << (PAGE_CACHE_SHIFT - + btree_inode->i_blkbits)); } } return 0; @@ -424,7 +447,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, &ins); + find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0); path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); @@ -531,7 +554,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *orig_root, u64 num_blocks, u64 search_start, u64 - search_end, struct btrfs_key *ins) + search_end, struct btrfs_key *ins, int data) { struct btrfs_path *path; struct btrfs_key key; @@ -548,43 +571,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int total_found = 0; int fill_prealloc = 0; int level; - int update_block_group = 0; - struct btrfs_block_group_cache *hint_block_group; + struct btrfs_block_group_cache *block_group; path = btrfs_alloc_path(); ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(btrfs_buffer_header(root->node)); - /* find search start here */ - if (0 && search_start && num_blocks) { - u64 used; - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&hint_block_group, - search_start, 1); - if (ret) { - used = btrfs_block_group_used(&hint_block_group->item); - if (used > (hint_block_group->key.offset * 9) / 10) - search_start = 0; - else if (search_start < hint_block_group->last_alloc) - search_start = hint_block_group->last_alloc; - } else { - search_start = 0; - } - } if (num_blocks == 0) { fill_prealloc = 1; num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - if (1 || !search_start) { - trans->block_group = btrfs_find_block_group(root, - trans->block_group, - 0); - if (trans->block_group->last_alloc > search_start) - search_start = trans->block_group->last_alloc; - update_block_group = 1; - } + block_group = btrfs_find_block_group(root, trans->block_group, data); + if (block_group->last_alloc > search_start) + search_start = block_group->last_alloc; check_failed: btrfs_init_path(path); ins->objectid = search_start; @@ -699,13 +700,13 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - if (update_block_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&trans->block_group, - ins->objectid, 1); - if (ret) { - trans->block_group->last_alloc = ins->objectid; - } + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + ins->objectid, 1); + if (ret) { + block_group->last_alloc = ins->objectid; + if (!data) + trans->block_group = block_group; } ins->offset = num_blocks; btrfs_free_path(path); @@ -725,7 +726,7 @@ error: int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, u64 num_blocks, u64 search_start, - u64 search_end, struct btrfs_key *ins) + u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; @@ -755,13 +756,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, - search_end, ins); + search_end, ins, data); if (ret) return ret; /* then do prealloc for the extent tree */ ret = find_free_extent(trans, root, 0, ins->objectid + ins->offset, - search_end, &prealloc_key); + search_end, &prealloc_key, 0); if (ret) return ret; @@ -793,7 +794,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 130a1d3d9f5f..23ba4e979fc9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1903,7 +1903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins); + num_blocks, 1, (u64)-1, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); -- cgit v1.2.3 From be74417553f4b2ee46be2088007a674ef2f02330 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 6 May 2007 10:15:01 -0400 Subject: Btrfs: more allocator enhancements Signed-off-by: Chris Mason --- fs/btrfs/bit-radix.c | 6 +- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 317 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/super.c | 2 +- 5 files changed, 249 insertions(+), 85 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index e9ace32d7bae..1a0271445dfb 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -26,8 +26,10 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) if (ret) return ret; } - set_bit(bit_slot, bits + 1); - return 0; + ret = test_and_set_bit(bit_slot, bits + 1); + if (ret < 0) + ret = 1; + return ret; } int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 179a046ce5a4..086e7dea3c92 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -257,6 +257,8 @@ struct btrfs_block_group_cache { struct btrfs_block_group_item item; u64 first_free; u64 last_alloc; + u64 pinned; + int data; }; struct crypto_hash; @@ -264,12 +266,12 @@ struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct btrfs_root *dev_root; - struct btrfs_block_group_cache *block_group_cache; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; + struct radix_tree_root block_group_data_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; @@ -1072,7 +1074,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) /* extent-tree.c */ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data); + *hint, u64 search_start, + int data); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5828a104dfef..7930458c227e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c5ae51893f78..2937fd9aba74 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,36 +12,88 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static struct btrfs_block_group_cache *lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) +{ + struct btrfs_block_group_cache *block_group; + int ret; + + ret = radix_tree_gang_lookup(&info->block_group_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } + ret = radix_tree_gang_lookup(&info->block_group_data_radix, + (void **)&block_group, + blocknr, 1); + if (ret) { + if (block_group->key.objectid <= blocknr && blocknr < + block_group->key.objectid + block_group->key.offset) + return block_group; + } +printk("lookup_block_group fails for blocknr %Lu\n", blocknr); + return NULL; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache - *hint, int data) + *hint, u64 search_start, + int data) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 used; u64 last = 0; u64 hint_last; int i; int ret; int full_search = 0; - if (!data && hint) { + + if (data) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + + if (search_start) { + struct btrfs_block_group_cache *shint; + shint = lookup_block_group(info, search_start); + if (shint->data == data) { + used = btrfs_block_group_used(&shint->item); + if (used + shint->pinned < + (shint->key.offset * 8) / 10) { + return shint; + } + } + } + if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used < (hint->key.offset * 2) / 3) { + if (used + hint->pinned < (hint->key.offset * 8) / 10) { return hint; } - radix_tree_tag_clear(&info->block_group_radix, - hint->key.objectid + hint->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); - last = hint->key.objectid + hint->key.offset; + if (used >= (hint->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + hint->key.objectid + + hint->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } + last = hint->key.offset * 2; + if (hint->key.objectid >= last) + last = max(search_start, hint->key.objectid - last); + else + last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = 0; - last = 0; + hint_last = search_start; + last = search_start; } while(1) { - ret = radix_tree_gang_lookup_tag(&info->block_group_radix, - (void **)cache, + ret = radix_tree_gang_lookup_tag(radix, (void **)cache, last, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_AVAIL); if (!ret) @@ -49,65 +101,54 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < (cache[i]->key.offset * 2) / 3) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < + (cache[i]->key.offset * 8) / 10) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= (cache[i]->key.offset * 8) / 10) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } last = hint_last; again: while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, - last, ARRAY_SIZE(cache)); + ret = radix_tree_gang_lookup(radix, (void **)cache, + last, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { last = cache[i]->key.objectid + cache[i]->key.offset; - if (!full_search && !data && - (cache[i]->key.objectid & cache[i]->key.offset)) - continue; - if (!full_search && data && - (cache[i]->key.objectid & cache[i]->key.offset) == 0) - continue; used = btrfs_block_group_used(&cache[i]->item); - if (used < cache[i]->key.offset) { - info->block_group_cache = cache[i]; + if (used + cache[i]->pinned < cache[i]->key.offset) { found_group = cache[i]; goto found; } - radix_tree_tag_clear(&info->block_group_radix, - cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_AVAIL); + if (used >= cache[i]->key.offset) { + radix_tree_tag_clear(radix, + cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } } - info->block_group_cache = NULL; if (!full_search) { - last = 0; + last = search_start; full_search = 1; goto again; } -found: if (!found_group) { - ret = radix_tree_gang_lookup(&info->block_group_radix, + ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); } +found: return found_group; } @@ -252,18 +293,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, return ret; if (pending_ret) return pending_ret; + if (cache->data) + cache->last_alloc = cache->first_free; return 0; } -int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int write_dirty_block_radix(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct radix_tree_root *radix) { struct btrfs_block_group_cache *cache[8]; int ret; int err = 0; int werr = 0; - struct radix_tree_root *radix = &root->fs_info->block_group_radix; int i; struct btrfs_path *path; @@ -285,35 +328,74 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, path, cache[i]); if (err) werr = err; - cache[i]->last_alloc = cache[i]->first_free; } } btrfs_free_path(path); return werr; } +int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + int ret2; + ret = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_radix); + ret2 = write_dirty_block_radix(trans, root, + &root->fs_info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num, int alloc) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; u64 total = num; u64 old_val; u64 block_in_group; int ret; + if (num != 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; while(total) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&cache, blocknr, 1); + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); if (!ret) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); return -1; } block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || cache->key.objectid > + blocknr) { + if (radix == &info->block_group_data_radix) + radix = &info->block_group_radix; + else + radix = &info->block_group_data_radix; + ret = radix_tree_gang_lookup(radix, (void **)&cache, + blocknr, 1); + if (!ret) { + printk(KERN_CRIT "blocknr %Lu lookup failed\n", + blocknr); + return -1; + } + block_in_group = blocknr - cache->key.objectid; + if (block_in_group > cache->key.offset || + cache->key.objectid > blocknr) { + BUG(); + } + } WARN_ON(block_in_group > cache->key.offset); - radix_tree_tag_set(&info->block_group_radix, - cache->key.objectid + cache->key.offset - 1, + radix_tree_tag_set(radix, cache->key.objectid + + cache->key.offset - 1, BTRFS_BLOCK_GROUP_DIRTY); old_val = btrfs_block_group_used(&cache->item); @@ -346,6 +428,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct { unsigned long gang[8]; struct inode *btree_inode = root->fs_info->btree_inode; + struct btrfs_block_group_cache *block_group; u64 first = 0; int ret; int i; @@ -360,6 +443,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); + block_group = lookup_block_group(root->fs_info, + gang[i]); + if (block_group) { + WARN_ON(block_group->pinned == 0); + block_group->pinned--; + if (gang[i] < block_group->last_alloc) + block_group->last_alloc = gang[i]; + } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - btree_inode->i_blkbits)); @@ -420,10 +511,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) btrfs_block_release(root, bh); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); + if (!err) { + struct btrfs_block_group_cache *cache; + cache = lookup_block_group(root->fs_info, blocknr); + if (cache) + cache->pinned++; + } } else { err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr); } - BUG_ON(err); + BUG_ON(err < 0); return 0; } @@ -502,6 +599,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int i; struct radix_tree_root *pending_radix; struct radix_tree_root *pinned_radix; + struct btrfs_block_group_cache *cache; pending_radix = &extent_root->fs_info->pending_del_radix; pinned_radix = &extent_root->fs_info->pinned_radix; @@ -513,7 +611,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct break; for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); - BUG_ON(wret); + if (wret == 0) { + cache = lookup_block_group(extent_root->fs_info, + gang[i]); + if (cache) + cache->pinned++; + } + if (wret < 0) { + printk(KERN_CRIT "set_radix_bit, err %d\n", + wret); + BUG_ON(wret < 0); + } wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, @@ -563,6 +671,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int slot = 0; u64 last_block = 0; u64 test_block; + u64 orig_search_start = search_start; int start_found; struct btrfs_leaf *l; struct btrfs_root * root = orig_root->fs_info->extent_root; @@ -572,6 +681,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int fill_prealloc = 0; int level; struct btrfs_block_group_cache *block_group; + int full_scan = 0; path = btrfs_alloc_path(); ins->flags = 0; @@ -583,10 +693,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root num_blocks = 1; total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3; } - block_group = btrfs_find_block_group(root, trans->block_group, data); + if (search_start) { + block_group = lookup_block_group(info, search_start); + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + } else { + block_group = btrfs_find_block_group(root, + trans->block_group, 0, + data); + } + +check_failed: + if (block_group->data != data) + WARN_ON(1); if (block_group->last_alloc > search_start) search_start = block_group->last_alloc; -check_failed: btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; @@ -639,6 +760,13 @@ check_failed: } start_found = 1; last_block = key.objectid + key.offset; + if (last_block >= block_group->key.objectid + + block_group->key.offset) { + btrfs_release_path(root, path); + search_start = block_group->key.objectid + + block_group->key.offset * 2; + goto new_group; + } next: path->slots[0]++; } @@ -650,16 +778,17 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) { - if (search_start == 0) + if (full_scan) return -ENOSPC; - search_start = 0; - goto check_failed; + search_start = orig_search_start; + full_scan = 1; + goto new_group; } for (test_block = ins->objectid; test_block < ins->objectid + num_blocks; test_block++) { if (test_radix_bit(&info->pinned_radix, test_block)) { search_start = test_block + 1; - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_insert_nr) { @@ -670,7 +799,7 @@ check_pending: ins->objectid <= last) { search_start = last + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (!fill_prealloc && info->extent_tree_prealloc_nr) { @@ -680,7 +809,7 @@ check_pending: ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; WARN_ON(1); - goto check_failed; + goto new_group; } } if (fill_prealloc) { @@ -696,14 +825,12 @@ check_pending: } if (total_found < total_needed) { search_start = test_block; - goto check_failed; + goto new_group; } info->extent_tree_prealloc_nr = total_found; } - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)&block_group, - ins->objectid, 1); - if (ret) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { block_group->last_alloc = ins->objectid; if (!data) trans->block_group = block_group; @@ -711,6 +838,18 @@ check_pending: ins->offset = num_blocks; btrfs_free_path(path); return 0; + +new_group: + if (search_start >= btrfs_super_total_blocks(info->disk_super)) { + search_start = orig_search_start; + full_scan = 1; + } + block_group = lookup_block_group(info, search_start); + if (!full_scan) + block_group = btrfs_find_block_group(root, block_group, + search_start, data); + goto check_failed; + error: btrfs_release_path(root, path); btrfs_free_path(path); @@ -794,7 +933,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -984,21 +1123,19 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) +static int free_block_group_radix(struct radix_tree_root *radix) { int ret; struct btrfs_block_group_cache *cache[8]; int i; while(1) { - ret = radix_tree_gang_lookup(&info->block_group_radix, - (void **)cache, 0, + ret = radix_tree_gang_lookup(radix, (void **)cache, 0, ARRAY_SIZE(cache)); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_delete(&info->block_group_radix, - cache[i]->key.objectid + + radix_tree_delete(radix, cache[i]->key.objectid + cache[i]->key.offset - 1); kfree(cache[i]); } @@ -1006,6 +1143,20 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +int btrfs_free_block_groups(struct btrfs_fs_info *info) +{ + int ret; + int ret2; + + ret = free_block_group_radix(&info->block_group_radix); + ret2 = free_block_group_radix(&info->block_group_data_radix); + if (ret) + return ret; + if (ret2) + return ret2; + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; @@ -1013,13 +1164,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) int err = 0; struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; + struct btrfs_fs_info *info = root->fs_info; + struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_leaf *leaf; u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize; u64 used; + u64 nr = 0; - root = root->fs_info->extent_root; + root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; key.flags = 0; @@ -1030,7 +1184,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) return -ENOMEM; while(1) { - ret = btrfs_search_slot(NULL, root->fs_info->extent_root, + ret = btrfs_search_slot(NULL, info->extent_root, &key, path, 0, 0); if (ret != 0) { err = ret; @@ -1050,23 +1204,28 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->pinned = 0; + cache->data = (nr & 1); key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); - ret = radix_tree_insert(&root->fs_info->block_group_radix, - found_key.objectid + + if (nr & 1) + radix = &info->block_group_data_radix; + else + radix = &info->block_group_radix; + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); used = btrfs_block_group_used(bi); - if (used < (key.offset * 2) / 3) { - radix_tree_tag_set(&root->fs_info->block_group_radix, - found_key.objectid + + if (used < (key.offset * 8) / 10) { + radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(root->fs_info->disk_super)) + btrfs_super_total_blocks(info->disk_super)) break; + nr++; } btrfs_free_path(path); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 23ba4e979fc9..d5ac0d8ee3c9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -811,7 +811,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0); + group = btrfs_find_block_group(root, group, 0, 0); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; -- cgit v1.2.3 From e37c9e6921207cf503634b06bee37ecb7904408d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 May 2007 20:13:14 -0400 Subject: Btrfs: many allocator fixes, pretty solid Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/bit-radix.c | 10 +- fs/btrfs/bit-radix.h | 2 +- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 375 ++++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/super.c | 4 +- fs/btrfs/transaction.c | 3 +- 8 files changed, 340 insertions(+), 59 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 6a8c8cd03ca4..f6df246f26c3 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,6 +7,7 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode +* Add ability to switch a block group from data to metadata or vice versa * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 1a0271445dfb..8f9cd4277231 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -77,7 +77,7 @@ int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) } int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr) + unsigned long start, int nr) { unsigned long *bits; unsigned long *gang[4]; @@ -85,10 +85,13 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, int ret; int i; int total_found = 0; + unsigned long slot; - ret = radix_tree_gang_lookup(radix, (void **)gang, 0, ARRAY_SIZE(gang)); + slot = start / BIT_RADIX_BITS_PER_ARRAY; + ret = radix_tree_gang_lookup(radix, (void **)gang, slot, + ARRAY_SIZE(gang)); + found = start % BIT_RADIX_BITS_PER_ARRAY; for (i = 0; i < ret && nr > 0; i++) { - found = 0; bits = gang[i]; while(nr > 0) { found = find_next_bit(bits + 1, @@ -104,6 +107,7 @@ int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, } else break; } + found = 0; } return total_found; } diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 56aad4c7d7f7..4e717e30db4f 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -6,7 +6,7 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - int nr); + unsigned long start, int nr); static inline void init_bit_radix(struct radix_tree_root *radix) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdb7c23c41f9..92a6078de827 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -259,7 +259,9 @@ struct btrfs_block_group_cache { u64 first_free; u64 last_alloc; u64 pinned; + u64 last_prealloc; int data; + int cached; }; struct crypto_hash; @@ -273,6 +275,7 @@ struct btrfs_fs_info { struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; + struct radix_tree_root extent_map_radix; u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3]; int extent_tree_insert_nr; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7930458c227e..2dbf422a2b9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3edfc300289f..3ac9da453472 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,97 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static int cache_block_group(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct radix_tree_root *extent_radix; + int slot; + u64 i; + u64 last = 0; + u64 hole_size; + int found = 0; + + root = root->fs_info->extent_root; + extent_radix = &root->fs_info->extent_map_radix; + + if (block_group->cached) + return 0; + if (block_group->data) + return 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; +printk("cache block group %Lu\n", block_group->key.objectid); + key.objectid = block_group->key.objectid; + key.flags = 0; + key.offset = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (ret && path->slots[0] > 0) + path->slots[0]--; + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(&leaf->header)) { + ret = btrfs_next_leaf(root, path); + if (ret == 0) + continue; + else { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, + last + i); + } + break; + } + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.objectid >= block_group->key.objectid + + block_group->key.offset) { + if (found) { + hole_size = block_group->key.objectid + + block_group->key.offset - last; + } else { + last = block_group->key.objectid; + hole_size = block_group->key.offset; + } + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + break; + } + if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { + if (!found) { + last = key.objectid + key.offset; + found = 1; + } else { + hole_size = key.objectid - last; + for (i = 0; i < hole_size; i++) { + set_radix_bit(extent_radix, last + i); + } + last = key.objectid + key.offset; + } + } + path->slots[0]++; + } + + block_group->cached = 1; + btrfs_free_path(path); + return 0; +} + static struct btrfs_block_group_cache *lookup_block_group(struct btrfs_fs_info *info, u64 blocknr) @@ -44,6 +135,63 @@ static struct btrfs_block_group_cache *lookup_block_group(struct return NULL; } +static u64 leaf_range(struct btrfs_root *root) +{ + u64 size = BTRFS_LEAF_DATA_SIZE(root); + size = size / (sizeof(struct btrfs_extent_item) + + sizeof(struct btrfs_item)); + return size; +} + +static u64 find_search_start(struct btrfs_root *root, + struct btrfs_block_group_cache **cache_ret, + u64 search_start, int num) +{ + unsigned long gang[8]; + int ret; + struct btrfs_block_group_cache *cache = *cache_ret; + u64 last = max(search_start, cache->key.objectid); + + if (cache->data) + goto out; + if (num > 1) { + last = max(last, cache->last_prealloc); + } +again: + cache_block_group(root, cache); + while(1) { + ret = find_first_radix_bit(&root->fs_info->extent_map_radix, + gang, last, ARRAY_SIZE(gang)); + if (!ret) + goto out; + last = gang[ret-1] + 1; + if (num > 1) { + if (ret != ARRAY_SIZE(gang)) { + goto new_group; + } + if (gang[ret-1] - gang[0] > leaf_range(root)) { + continue; + } + } + if (gang[0] >= cache->key.objectid + cache->key.offset) { + goto new_group; + } + return gang[0]; + } +out: + return max(cache->last_alloc, search_start); + +new_group: + cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + if (!cache) { + return max((*cache_ret)->last_alloc, search_start); + } + cache = btrfs_find_block_group(root, cache, + last + cache->key.offset - 1, 0); + *cache_ret = cache; + goto again; +} + struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -89,13 +237,18 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, } last = hint->key.offset * 2; if (hint->key.objectid >= last) - last = max(search_start, hint->key.objectid - last); + last = max(search_start + hint->key.offset - 1, + hint->key.objectid - last); else last = hint->key.objectid + hint->key.offset; hint_last = last; } else { - hint_last = search_start; - last = search_start; + if (hint) + hint_last = max(hint->key.objectid, search_start); + else + hint_last = search_start; + + last = hint_last; } while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, @@ -357,13 +510,14 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc) + u64 blocknr, u64 num, int alloc, int mark_free) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; u64 total = num; u64 old_val; u64 block_in_group; + u64 i; while(total) { cache = lookup_block_group(info, blocknr); @@ -380,18 +534,38 @@ static int update_block_group(struct btrfs_trans_handle *trans, old_val = btrfs_block_group_used(&cache->item); num = min(total, cache->key.offset - block_in_group); - total -= num; - blocknr += num; if (alloc) { old_val += num; if (blocknr > cache->last_alloc) cache->last_alloc = blocknr; + if (!cache->data) { + for (i = 0; i < num; i++) { + clear_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } } else { old_val -= num; if (blocknr < cache->first_free) cache->first_free = blocknr; + if (!cache->data && mark_free) { + for (i = 0; i < num; i++) { + set_radix_bit(&info->extent_map_radix, + blocknr + i); + } + } + if (old_val < (cache->key.offset * 8) / 10 && + old_val + num >= (cache->key.offset * 8) / 10) { +printk("group %Lu now available\n", cache->key.objectid); + radix_tree_tag_set(cache->radix, + cache->key.objectid + + cache->key.offset - 1, + BTRFS_BLOCK_GROUP_AVAIL); + } } btrfs_set_block_group_used(&cache->item, old_val); + total -= num; + blocknr += num; } return 0; } @@ -413,9 +587,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct int ret; int i; struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix; + struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix; while(1) { - ret = find_first_radix_bit(pinned_radix, gang, + ret = find_first_radix_bit(pinned_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -430,6 +605,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct block_group->pinned--; if (gang[i] < block_group->last_alloc) block_group->last_alloc = gang[i]; + if (gang[i] < block_group->last_prealloc) + block_group->last_prealloc = gang[i]; + if (!block_group->data) + set_radix_bit(extent_radix, gang[i]); } try_remove_page(btree_inode->i_mapping, gang[i] << (PAGE_CACHE_SHIFT - @@ -508,7 +687,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 blocknr, u64 num_blocks, int pin, + int mark_free) { struct btrfs_path *path; struct btrfs_key key; @@ -556,10 +736,10 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ret = btrfs_del_item(trans, extent_root, path); if (ret) BUG(); - ret = update_block_group(trans, root, blocknr, num_blocks, 0); + ret = update_block_group(trans, root, blocknr, num_blocks, 0, + mark_free); BUG_ON(ret); } - btrfs_release_path(extent_root, path); btrfs_free_path(path); finish_current_insert(trans, extent_root); return ret; @@ -585,7 +765,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct pinned_radix = &extent_root->fs_info->pinned_radix; while(1) { - ret = find_first_radix_bit(pending_radix, gang, + ret = find_first_radix_bit(pending_radix, gang, 0, ARRAY_SIZE(gang)); if (!ret) break; @@ -605,7 +785,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct wret = clear_radix_bit(pending_radix, gang[i]); BUG_ON(wret); wret = __free_extent(trans, extent_root, - gang[i], 1, 0); + gang[i], 1, 0, 0); if (wret) err = wret; } @@ -627,7 +807,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root pin_down_block(root, blocknr, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin); + ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -688,18 +868,45 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: if (!full_scan && block_group->data != data) WARN_ON(1); - if (block_group->last_alloc > search_start) - search_start = block_group->last_alloc; + + if (!data) + search_start = find_search_start(root, &block_group, + search_start, total_needed); + else + search_start = max(block_group->last_alloc, search_start); + btrfs_init_path(path); ins->objectid = search_start; ins->offset = 0; start_found = 0; + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); if (ret < 0) goto error; - if (path->slots[0] > 0) + if (path->slots[0] > 0) { path->slots[0]--; + } + + l = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + /* + * a rare case, go back one key if we hit a block group item + * instead of an extent item + */ + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY && + key.objectid + key.offset >= search_start) { + ins->objectid = key.objectid; + ins->offset = key.offset - 1; + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, ins, path, 0, 0); + if (ret < 0) + goto error; + + if (path->slots[0] > 0) { + path->slots[0]--; + } + } while (1) { l = btrfs_buffer_leaf(path->nodes[0]); @@ -725,21 +932,23 @@ check_failed: ins->offset = search_end - ins->objectid; goto check_pending; } + btrfs_disk_key_to_cpu(&key, &l->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) - goto next; - if (key.objectid >= search_start) { - if (start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; - ins->offset = hole_size; - goto check_pending; - } + if (key.objectid >= search_start && key.objectid > last_block && + start_found) { + if (last_block < search_start) + last_block = search_start; + hole_size = key.objectid - last_block; + if (hole_size >= num_blocks) { + ins->objectid = last_block; + ins->offset = hole_size; + goto check_pending; } } + + if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) + goto next; + start_found = 1; last_block = key.objectid + key.offset; if (last_block >= block_group->key.objectid + @@ -759,6 +968,7 @@ check_pending: */ btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); + if (ins->objectid + num_blocks >= search_end) { if (full_scan) return -ENOSPC; @@ -780,7 +990,7 @@ check_pending: info->extent_tree_insert[0] && ins->objectid <= last) { search_start = last + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } @@ -790,13 +1000,18 @@ check_pending: if (ins->objectid + num_blocks > first && ins->objectid <= info->extent_tree_prealloc[0]) { search_start = info->extent_tree_prealloc[0] + 1; - WARN_ON(1); + WARN_ON(!full_scan); goto new_group; } } if (fill_prealloc) { int nr; test_block = ins->objectid; + if (test_block - info->extent_tree_prealloc[total_needed - 1] >= + leaf_range(root)) { + total_found = 0; + info->extent_tree_prealloc_nr = total_found; + } while(test_block < ins->objectid + ins->offset && total_found < total_needed) { nr = total_needed - total_found - 1; @@ -811,11 +1026,15 @@ check_pending: } info->extent_tree_prealloc_nr = total_found; } - block_group = lookup_block_group(info, ins->objectid); - if (block_group) { - block_group->last_alloc = ins->objectid; - if (!data) - trans->block_group = block_group; + if (!data) { + block_group = lookup_block_group(info, ins->objectid); + if (block_group) { + if (fill_prealloc) + block_group->last_prealloc = + info->extent_tree_prealloc[total_needed-1]; + else + trans->block_group = block_group; + } } ins->offset = num_blocks; btrfs_free_path(path); @@ -824,6 +1043,7 @@ check_pending: new_group: if (search_start + num_blocks >= search_end) { search_start = orig_search_start; +printk("doing full scan!\n"); full_scan = 1; } block_group = lookup_block_group(info, search_start); @@ -871,26 +1091,57 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, info->extent_tree_insert[info->extent_tree_insert_nr++] = ins->objectid; ret = update_block_group(trans, root, - ins->objectid, ins->offset, 1); + ins->objectid, ins->offset, 1, 0); BUG_ON(ret); return 0; } + + /* + * if we're doing a data allocation, preallocate room in the + * extent tree first. This way the extent tree blocks end up + * in the correct block group. + */ + if (data) { + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + if (prealloc_key.objectid + prealloc_key.offset >= search_end) { + int nr = info->extent_tree_prealloc_nr; + search_end = info->extent_tree_prealloc[nr - 1] - 1; + } else { + search_start = info->extent_tree_prealloc[0] + 1; + } + } /* do the real allocation */ ret = find_free_extent(trans, root, num_blocks, search_start, search_end, ins, data); - if (ret) + if (ret) { return ret; + } - /* then do prealloc for the extent tree */ - if (ins->objectid + ins->offset >= search_end) - search_end = ins->objectid - 1; - else - search_start = ins->objectid + ins->offset; + /* + * if we're doing a metadata allocation, preallocate space in the + * extent tree second. This way, we don't create a tiny hole + * in the allocation map between any unused preallocation blocks + * and the metadata block we're actually allocating. On disk, + * it'll go: + * [block we've allocated], [used prealloc 1], [ unused prealloc ] + * The unused prealloc will get reused the next time around. + */ + if (!data) { + if (ins->objectid + ins->offset >= search_end) + search_end = ins->objectid - 1; + else + search_start = ins->objectid + ins->offset; - ret = find_free_extent(trans, root, 0, search_start, - search_end, &prealloc_key, 0); - if (ret) - return ret; + ret = find_free_extent(trans, root, 0, search_start, + search_end, &prealloc_key, 0); + if (ret) { + return ret; + } + } super_blocks_used = btrfs_super_blocks_used(info->disk_super); btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + @@ -900,11 +1151,13 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); - if (ret) + if (ret) { return ret; - if (pending_ret) + } + if (pending_ret) { return pending_ret; - ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); + } + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0); return 0; } @@ -920,7 +1173,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, hint, (unsigned long)-1, &ins, 0); + 1, 0, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; @@ -1134,6 +1387,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) { int ret; int ret2; + unsigned long gang[16]; + int i; ret = free_block_group_radix(&info->block_group_radix); ret2 = free_block_group_radix(&info->block_group_data_radix); @@ -1141,6 +1396,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return ret; if (ret2) return ret2; + + while(1) { + ret = find_first_radix_bit(&info->extent_map_radix, + gang, 0, ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + clear_radix_bit(&info->extent_map_radix, gang[i]); + } + } return 0; } @@ -1186,7 +1451,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; } - if (nr & 1) + if (nr % 3) radix = &info->block_group_data_radix; else radix = &info->block_group_radix; @@ -1197,8 +1462,14 @@ int btrfs_read_block_groups(struct btrfs_root *root) memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; + cache->last_prealloc = cache->key.objectid; cache->pinned = 0; - cache->data = (nr & 1); + cache->cached = 0; + + if (nr % 3) + cache->data = 1; + else + cache->data = 0; cache->radix = radix; key.objectid = found_key.objectid + found_key.offset; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d5ac0d8ee3c9..4c2870e2d12f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1539,7 +1539,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, kunmap(pages[i]); } SetPageChecked(pages[i]); - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1914,7 +1914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } BUG_ON(ret); alloc_extent_start = ins.objectid; - btrfs_update_inode_block_group(trans, inode); + // btrfs_update_inode_block_group(trans, inode); ret = btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8bbe9107ff7e..f0f03121b7b2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -122,7 +122,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, ARRAY_SIZE(gang)); + ret = find_first_radix_bit(dirty_pages, gang, + 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) { -- cgit v1.2.3 From 098f59c2512426926722a96f82af127bd91fb5e4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 11 May 2007 11:33:21 -0400 Subject: Btrfs: patch queue: fix corruption when splitting large items Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 10 ++++++++-- fs/btrfs/super.c | 8 ++++---- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 990d297a7a13..20e01f95ce6e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -871,6 +871,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_node_blockptr(lower, slot, blocknr); btrfs_set_header_nritems(&lower->header, nritems + 1); btrfs_mark_buffer_dirty(path->nodes[level]); + check_node(root, path, level); return 0; } @@ -1101,6 +1102,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { btrfs_block_release(root, right_buf); } + if (path->nodes[1]) + check_node(root, path, 1); return 0; } /* @@ -1216,6 +1219,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(t); btrfs_mark_buffer_dirty(right_buf); + wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); if (wret) ret = wret; @@ -1231,6 +1235,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); + if (path->nodes[1]) + check_node(root, path, 1); return ret; } @@ -1326,13 +1332,12 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root wret = insert_ptr(trans, root, path, &disk_key, bh_blocknr(right_buffer), - path->slots[1] - 1, 1); + path->slots[1], 1); if (wret) ret = wret; btrfs_block_release(root, path->nodes[0]); path->nodes[0] = right_buffer; path->slots[0] = 0; - path->slots[1] -= 1; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, path, &disk_key, 1); @@ -1379,6 +1384,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root } else btrfs_block_release(root, right_buffer); BUG_ON(path->slots[0] < 0); + check_node(root, path, 1); if (!double_split) return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4c2870e2d12f..eaa48f09d1c4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1529,7 +1529,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - memcpy(ptr, bh->b_data, offset + write_bytes); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, bh->b_data, offset + write_bytes); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); } else { @@ -1686,9 +1687,9 @@ static int drop_extents(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); BUG_ON(ret); btrfs_release_path(root, path); + extent = NULL; if (found_extent) { - inode->i_blocks -= - btrfs_file_extent_num_blocks(extent) << 3; + inode->i_blocks -= extent_num_blocks << 3; ret = btrfs_free_extent(trans, root, disk_blocknr, disk_num_blocks, 0); @@ -1832,7 +1833,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 alloc_extent_start; struct btrfs_trans_handle *trans; struct btrfs_key ins; - pinned[0] = NULL; pinned[1] = NULL; if (file->f_flags & O_DIRECT) -- cgit v1.2.3 From de428b63b16f9c74a24228a517713e8c65f79475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 May 2007 13:28:27 -0400 Subject: Btrfs: allocator optimizations, truncate readahead Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 12 ++++++-- fs/btrfs/extent-tree.c | 76 ++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/super.c | 76 +++++++++++++++++++++++++++++++++++++------------- 4 files changed, 129 insertions(+), 37 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d8caeba5d758..5830b4333d83 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1081,7 +1081,7 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data); + int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 678456d1e2f9..4c0262b57664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -257,24 +257,30 @@ static struct address_space_operations btree_aops = { int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; + int ret = 0; bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return 0; - if (buffer_uptodate(bh)) + if (buffer_uptodate(bh)) { + ret = 1; goto done; - if (test_set_buffer_locked(bh)) + } + if (test_set_buffer_locked(bh)) { + ret = 1; goto done; + } if (!buffer_uptodate(bh)) { get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); } else { unlock_buffer(bh); + ret = 1; } done: brelse(bh); - return 0; + return ret; } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1497ff98f0d3..e3c6bfea3751 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -12,6 +12,33 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int del_pending_extents(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root); +static void reada_extent_leaves(struct btrfs_root *root, + struct btrfs_path *path, u64 limit) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1] + 1; + nritems = btrfs_header_nritems(&node->header); + for (i = slot; i < nritems && i < slot + 8; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid > limit) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int cache_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *block_group) { @@ -24,6 +51,7 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; + u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -46,14 +74,17 @@ printk("cache block group %Lu\n", block_group->key.objectid); return ret; if (ret && path->slots[0] > 0) path->slots[0]--; + limit = block_group->key.objectid + block_group->key.offset; + reada_extent_leaves(root, path, limit); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&leaf->header)) { + reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); - if (ret == 0) + if (ret == 0) { continue; - else { + } else { if (found) { hole_size = block_group->key.objectid + block_group->key.offset - last; @@ -187,7 +218,7 @@ new_group: return max((*cache_ret)->last_alloc, search_start); } cache = btrfs_find_block_group(root, cache, - last + cache->key.offset - 1, 0); + last + cache->key.offset - 1, 0, 0); *cache_ret = cache; goto again; } @@ -195,7 +226,7 @@ new_group: struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, - int data) + int data, int owner) { struct btrfs_block_group_cache *cache[8]; struct btrfs_block_group_cache *found_group = NULL; @@ -207,6 +238,10 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int i; int ret; int full_search = 0; + int factor = 8; + + if (!owner) + factor = 5; if (data) radix = &info->block_group_data_radix; @@ -219,14 +254,14 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < - (shint->key.offset * 8) / 10) { + (shint->key.offset * factor) / 10) { return shint; } } } if (hint && hint->data == data) { used = btrfs_block_group_used(&hint->item); - if (used + hint->pinned < (hint->key.offset * 8) / 10) { + if (used + hint->pinned < (hint->key.offset * factor) / 10) { return hint; } if (used >= (hint->key.offset * 8) / 10) { @@ -261,7 +296,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, cache[i]->key.offset; used = btrfs_block_group_used(&cache[i]->item); if (used + cache[i]->pinned < - (cache[i]->key.offset * 8) / 10) { + (cache[i]->key.offset * factor) / 10) { found_group = cache[i]; goto found; } @@ -272,6 +307,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } last = hint_last; again: @@ -295,13 +331,16 @@ again: BTRFS_BLOCK_GROUP_AVAIL); } } + cond_resched(); } if (!full_search) { +printk("find block group doing full search data %d start %Lu\n", data, search_start); last = search_start; full_search = 1; goto again; } if (!found_group) { +printk("find block group bailing to zero data %d\n", data); ret = radix_tree_gang_lookup(radix, (void **)&found_group, 0, 1); BUG_ON(ret != 1); @@ -554,8 +593,8 @@ static int update_block_group(struct btrfs_trans_handle *trans, blocknr + i); } } - if (old_val < (cache->key.offset * 6) / 10 && - old_val + num >= (cache->key.offset * 6) / 10) { + if (old_val < (cache->key.offset * 5) / 10 && + old_val + num >= (cache->key.offset * 5) / 10) { printk("group %Lu now available\n", cache->key.objectid); radix_tree_tag_set(cache->radix, cache->key.objectid + @@ -842,6 +881,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; + u64 limit; path = btrfs_alloc_path(); ins->flags = 0; @@ -858,11 +898,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_start) { block_group = lookup_block_group(info, search_start); block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, - data); + data, 1); } check_failed: @@ -916,6 +956,12 @@ check_failed: info->extent_tree_prealloc_nr = 0; total_found = 0; } + if (start_found) + limit = last_block + + block_group->key.offset / 2; + else + limit = search_start + + block_group->key.offset / 2; ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -960,6 +1006,7 @@ check_failed: } next: path->slots[0]++; + cond_resched(); } // FIXME -ENOSPC check_pending: @@ -1049,7 +1096,8 @@ printk("doing full scan!\n"); block_group = lookup_block_group(info, search_start); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, - search_start, data); + search_start, data, 0); + cond_resched(); goto check_failed; error: @@ -1102,7 +1150,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, * in the correct block group. */ if (data) { - ret = find_free_extent(trans, root, 0, search_start, + ret = find_free_extent(trans, root, 0, 0, search_end, &prealloc_key, 0); if (ret) { return ret; @@ -1173,7 +1221,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, 0, (unsigned long)-1, &ins, 0); + 1, hint, (unsigned long)-1, &ins, 0); if (ret) { BUG(); return NULL; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eaa48f09d1c4..0f79490123cf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -351,6 +351,35 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, return ret; } +static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + if (slot == 0) + return; + nritems = btrfs_header_nritems(&node->header); + for (i = slot - 1; i >= 0; i--) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -386,6 +415,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } + reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; if (btrfs_disk_key_objectid(found_key) != inode->i_ino) @@ -587,28 +617,30 @@ printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_r return d_splice_alias(inode, dentry); } -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path) +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) { struct btrfs_node *node; int i; - int nritems; - u64 objectid; + u32 nritems; u64 item_objectid; u64 blocknr; int slot; + int ret; if (!path->nodes[1]) return; node = btrfs_buffer_node(path->nodes[1]); slot = path->slots[1]; - objectid = btrfs_disk_key_objectid(&node->ptrs[slot].key); nritems = btrfs_header_nritems(&node->header); - for (i = slot; i < nritems; i++) { + for (i = slot + 1; i < nritems; i++) { item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); if (item_objectid != objectid) break; blocknr = btrfs_node_blockptr(node, i); - readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; } } @@ -646,21 +678,20 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (ret < 0) goto err; advance = 0; - reada_leaves(root, path); + reada_leaves(root, path, inode->i_ino); while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { + reada_leaves(root, path, inode->i_ino); ret = btrfs_next_leaf(root, path); if (ret) break; leaf = btrfs_buffer_leaf(path->nodes[0]); nritems = btrfs_header_nritems(&leaf->header); slot = path->slots[0]; - if (path->slots[1] == 0) - reada_leaves(root, path); } else { slot++; path->slots[0]++; @@ -805,13 +836,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_inode_item inode_item; struct btrfs_key *location; int ret; + int owner; inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); BTRFS_I(inode)->root = root; - group = btrfs_find_block_group(root, group, 0, 0); + if (mode & S_IFDIR) + owner = 0; + else + owner = 1; + group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; inode->i_uid = current->fsuid; @@ -1562,7 +1598,7 @@ failed: static int drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end) + u64 start, u64 end, u64 *hint_block) { int ret; struct btrfs_key key; @@ -1659,17 +1695,14 @@ static int drop_extents(struct btrfs_trans_handle *trans, new_num = (start - key.offset) >> inode->i_blkbits; old_num = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); inode->i_blocks -= (old_num - new_num) << 3; btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); } else { WARN_ON(1); - /* - ret = btrfs_truncate_item(trans, root, path, - start - key.offset); - BUG_ON(ret); - */ } } if (!keep) { @@ -1683,6 +1716,8 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_disk_num_blocks(extent); extent_num_blocks = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -1831,6 +1866,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, u64 start_pos; u64 num_blocks; u64 alloc_extent_start; + u64 hint_block; struct btrfs_trans_handle *trans; struct btrfs_key ins; pinned[0] = NULL; @@ -1871,6 +1907,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, } if (first_index != last_index && (last_index << PAGE_CACHE_SHIFT) < inode->i_size && + pos + count < inode->i_size && (count & (PAGE_CACHE_SIZE - 1))) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { @@ -1892,18 +1929,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, btrfs_set_trans_block_group(trans, inode); /* FIXME blocksize != 4096 */ inode->i_blocks += num_blocks << 3; + hint_block = 0; if (start_pos < inode->i_size) { /* FIXME blocksize != pagesize */ ret = drop_extents(trans, root, inode, start_pos, (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1)); + ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 1, (u64)-1, &ins, 1); + num_blocks, hint_block, (u64)-1, + &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset); @@ -2455,7 +2494,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } - static int btrfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { -- cgit v1.2.3 From f9f3c6b666f717510b67036c314ec915b9059eaa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 May 2007 14:05:12 -0400 Subject: Btrfs: 2.6.21-git fixes Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 -- fs/btrfs/disk-io.c | 3 --- fs/btrfs/super.c | 21 +-------------------- 3 files changed, 1 insertion(+), 25 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5830b4333d83..da12d8275817 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1197,6 +1197,4 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, int cow); /* super.c */ -extern struct subsystem btrfs_subsys; - #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c0262b57664..3da057f78bc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -649,9 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); - kobj_set_kset_s(fs_info, btrfs_subsys); - kobject_set_name(&fs_info->kobj, "%s", sb->s_id); - kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return tree_root; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0f79490123cf..0220c82841ad 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -18,24 +18,11 @@ #include "btrfs_inode.h" #include "ioctl.h" -static void btrfs_fsinfo_release(struct kobject *obj) -{ - struct btrfs_fs_info *fsinfo = container_of(obj, - struct btrfs_fs_info, kobj); - kfree(fsinfo); -} - -static struct kobj_type btrfs_fsinfo_ktype = { - .release = btrfs_fsinfo_release, -}; - struct btrfs_iget_args { u64 ino; struct btrfs_root *root; }; -decl_subsys(btrfs, &btrfs_fsinfo_ktype, NULL); - #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; @@ -2438,7 +2425,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, { struct btrfs_inode *ei = (struct btrfs_inode *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + if ((flags & (SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&ei->vfs_inode); } @@ -2588,12 +2575,7 @@ static int __init init_btrfs_fs(void) err = init_inodecache(); if (err) return err; - kset_set_kset_s(&btrfs_subsys, fs_subsys); - err = subsystem_register(&btrfs_subsys); - if (err) - goto out; return register_filesystem(&btrfs_fs_type); -out: destroy_inodecache(); return err; } @@ -2602,7 +2584,6 @@ static void __exit exit_btrfs_fs(void) { destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); - subsystem_unregister(&btrfs_subsys); printk("btrfs unloaded\n"); } -- cgit v1.2.3 From e06afa839e726959be2166bec4cb85c117e213f1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 May 2007 15:44:28 -0400 Subject: Btrfs: rename Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/dir-item.c | 27 ++++++++--- fs/btrfs/super.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 143 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index f6df246f26c3..861e34ae0979 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -8,6 +8,7 @@ * Add generation number to key pointer in nodes * Add generation number to inode * Add ability to switch a block group from data to metadata or vice versa +* forbid cross subvolume renames and hardlinks * Release * Do real tree locking * Add extent mirroring (backup copies of blocks) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index da12d8275817..3330004af1a6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -32,6 +32,7 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CSUM_SIZE 32 /* four bytes for CRC32 */ #define BTRFS_CRC32_SIZE 4 +#define BTRFS_EMPTY_DIR_SIZE 6 /* * the key defines the order in the tree, and so it also defines (optimal) diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 00a28d90fea6..b408a3d20c7b 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -9,7 +9,9 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, - u32 data_size) + u32 data_size, + const char *name, + int name_len) { int ret; char *ptr; @@ -18,6 +20,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { + struct btrfs_dir_item *di; + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) + return ERR_PTR(-EEXIST); ret = btrfs_extend_item(trans, root, path, data_size); WARN_ON(ret > 0); if (ret) @@ -37,6 +43,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key *location, u8 type) { int ret = 0; + int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; char *name_ptr; @@ -51,9 +58,12 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); btrfs_init_path(path); data_size = sizeof(*dir_item) + name_len; - dir_item = insert_with_overflow(trans, root, path, &key, data_size); + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); if (IS_ERR(dir_item)) { ret = PTR_ERR(dir_item); + if (ret == -EEXIST) + goto second_insert; goto out; } @@ -66,19 +76,20 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); btrfs_mark_buffer_dirty(path->nodes[0]); +second_insert: /* FIXME, use some real flag for selecting the extra index */ if (root == root->fs_info->tree_root) { ret = 0; goto out; } - btrfs_release_path(root, path); btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = location->objectid; - dir_item = insert_with_overflow(trans, root, path, &key, data_size); + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); if (IS_ERR(dir_item)) { - ret = PTR_ERR(dir_item); + ret2 = PTR_ERR(dir_item); goto out; } btrfs_cpu_key_to_disk(&dir_item->location, location); @@ -90,7 +101,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_free_path(path); - return ret; + if (ret) + return ret; + if (ret2) + return ret2; + return 0; } struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0220c82841ad..f49cad603ee8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -375,6 +375,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; struct btrfs_disk_key *found_key; + u32 found_type; struct btrfs_leaf *leaf; struct btrfs_file_extent_item *fi = NULL; u64 extent_start = 0; @@ -386,12 +387,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = 0; - /* - * use BTRFS_CSUM_ITEM_KEY because it is larger than inline keys - * or extent data - */ - btrfs_set_key_type(&key, BTRFS_CSUM_ITEM_KEY); + key.flags = (u32)-1; while(1) { btrfs_init_path(path); ret = btrfs_search_slot(trans, root, &key, path, -1, 1); @@ -405,10 +401,13 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, reada_truncate(root, path, inode->i_ino); leaf = btrfs_buffer_leaf(path->nodes[0]); found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); if (btrfs_disk_key_objectid(found_key) != inode->i_ino) break; - if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY && - btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY) + if (found_type != BTRFS_CSUM_ITEM_KEY && + found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY && + found_type != BTRFS_EXTENT_DATA_KEY) break; if (btrfs_disk_key_offset(found_key) < inode->i_size) break; @@ -460,10 +459,8 @@ static void btrfs_delete_inode(struct inode *inode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); - if (S_ISREG(inode->i_mode)) { - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - } + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); btrfs_free_inode(trans, root, inode); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -2504,6 +2501,116 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct inode *new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct timespec ctime = CURRENT_TIME; + struct btrfs_path *path; + struct btrfs_dir_item *di; + int ret; + + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + return -ENOTEMPTY; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out_fail; + } + + old_dentry->d_inode->i_nlink++; + old_dir->i_ctime = old_dir->i_mtime = ctime; + new_dir->i_ctime = new_dir->i_mtime = ctime; + old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + struct btrfs_key *location = &BTRFS_I(new_dir)->location; + u64 old_parent_oid; + di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + old_parent_oid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, + old_inode->i_ino, + old_parent_oid, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + ret = btrfs_insert_dir_item(trans, root, "..", 2, + old_inode->i_ino, location, 0); + if (ret) + goto out_fail; + } + + + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret == -EEXIST && new_inode) + ret = 0; + else if (ret) + goto out_fail; + + ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + if (ret) + goto out_fail; + + if (new_inode) { + new_inode->i_ctime = CURRENT_TIME; + di = btrfs_lookup_dir_index_item(trans, root, path, + new_dir->i_ino, + new_inode->i_ino, + new_dentry->d_name.name, + new_dentry->d_name.len, -1); + if (di && !IS_ERR(di)) { + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else + drop_nlink(new_inode); + btrfs_update_inode(trans, root, new_inode); + } +out_fail: + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2531,6 +2638,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .unlink = btrfs_unlink, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, + .rename = btrfs_rename, }; static struct inode_operations btrfs_dir_ro_inode_operations = { -- cgit v1.2.3 From 2b8d99a723a3186a69b5bd941a272dbecae9205b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 May 2007 08:34:10 -0400 Subject: Btrfs: symlinks and hard links Signed-off-by: Chris Mason --- fs/btrfs/super.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 147 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f49cad603ee8..c1a0adde9b1c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -26,11 +26,13 @@ struct btrfs_iget_args { #define BTRFS_SUPER_MAGIC 0x9123682E static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_symlink_inode_operations; static struct inode_operations btrfs_dir_ro_inode_operations; static struct super_operations btrfs_super_ops; static struct file_operations btrfs_dir_file_operations; static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; +static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_file_operations; static void btrfs_read_locked_inode(struct inode *inode) @@ -103,7 +105,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_op = &btrfs_dir_inode_operations; break; case S_IFLNK: - // inode->i_op = &page_symlink_inode_operations; + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; break; } return; @@ -940,6 +943,41 @@ out_unlock: return err; } +static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = old_dentry->d_inode; + int err; + int drop_inode = 0; + + if (inode->i_nlink == 0) + return -ENOENT; + + inc_nlink(inode); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + atomic_inc(&inode->i_count); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, dir); + btrfs_update_inode(trans, root, inode); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 dirid) @@ -2577,33 +2615,25 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } - ret = btrfs_add_link(trans, new_dentry, old_inode); - if (ret == -EEXIST && new_inode) - ret = 0; - else if (ret) - goto out_fail; - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); if (ret) goto out_fail; if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - di = btrfs_lookup_dir_index_item(trans, root, path, - new_dir->i_ino, - new_inode->i_ino, - new_dentry->d_name.name, - new_dentry->d_name.len, -1); - if (di && !IS_ERR(di)) { - btrfs_del_item(trans, root, path); - btrfs_release_path(root, path); - } + ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + if (ret) + goto out_fail; if (S_ISDIR(new_inode->i_mode)) clear_nlink(new_inode); else drop_nlink(new_inode); btrfs_update_inode(trans, root, new_inode); } + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret) + goto out_fail; + out_fail: btrfs_free_path(path); btrfs_end_transaction(trans, root); @@ -2611,6 +2641,94 @@ out_fail: return ret; } +static int btrfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + int name_len; + int datasize; + char *ptr; + struct btrfs_file_extent_item *ei; + + name_len = strlen(symname) + 1; + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) + return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + if (drop_inode) + goto out_unlock; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + datasize = btrfs_file_extent_calc_inline_size(name_len); + err = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(err); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, symname, name_len); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_size = name_len - 1; + btrfs_update_inode(trans, root, inode); + err = 0; + +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2636,9 +2754,11 @@ static struct inode_operations btrfs_dir_inode_operations = { .lookup = btrfs_lookup, .create = btrfs_create, .unlink = btrfs_unlink, + .link = btrfs_link, .mkdir = btrfs_mkdir, .rmdir = btrfs_rmdir, .rename = btrfs_rename, + .symlink = btrfs_symlink, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2660,6 +2780,11 @@ static struct address_space_operations btrfs_aops = { .commit_write = btrfs_commit_write, }; +static struct address_space_operations btrfs_symlink_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, +}; + static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, @@ -2676,6 +2801,12 @@ static struct file_operations btrfs_file_operations = { .fsync = btrfs_sync_file, }; +static struct inode_operations btrfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +}; + static int __init init_btrfs_fs(void) { int err; -- cgit v1.2.3 From 3a686375629da5d2e2ad019265b66ef113c87455 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 May 2007 13:35:57 -0400 Subject: Btrfs: sparse files! Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 14 ++++-- fs/btrfs/file-item.c | 11 +++-- fs/btrfs/super.c | 121 ++++++++++++++++++++++++++++++++++++++++--------- 4 files changed, 116 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3330004af1a6..e8f741167c6b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1180,6 +1180,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, + u64 disk_num_blocks, u64 num_blocks); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e3c6bfea3751..a366415e03a6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -443,6 +443,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, buf_leaf = btrfs_buffer_leaf(buf); for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { if (leaf) { + u64 disk_blocknr; key = &buf_leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -451,8 +452,10 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) continue; - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); BUG_ON(ret); } else { @@ -1248,6 +1251,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, leaf = btrfs_buffer_leaf(cur); nritems = btrfs_header_nritems(&leaf->header); for (i = 0; i < nritems; i++) { + u64 disk_blocknr; key = &leaf->items[i].key; if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -1258,8 +1262,10 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - ret = btrfs_free_extent(trans, root, - btrfs_file_extent_disk_blocknr(fi), + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + ret = btrfs_free_extent(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi), 0); BUG_ON(ret); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index a66709e6d036..7990b5742114 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -9,7 +9,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 num_blocks) + u64 offset, u64 disk_num_blocks, + u64 num_blocks) { int ret = 0; struct btrfs_file_extent_item *item; @@ -30,7 +31,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, num_blocks); + btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); btrfs_set_file_extent_offset(item, 0); btrfs_set_file_extent_num_blocks(item, num_blocks); btrfs_set_file_extent_generation(item, trans->transid); @@ -176,14 +177,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid || csum_offset >= MAX_CSUM_ITEMS(root)) { - WARN_ON(1); goto insert; } if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; diff = diff - btrfs_item_size(leaf->items + path->slots[0]); - WARN_ON(diff != BTRFS_CRC32_SIZE); + if (diff != BTRFS_CRC32_SIZE) + goto insert; ret = btrfs_extend_item(trans, root, path, diff); BUG_ON(ret); goto csum; @@ -241,7 +242,7 @@ int btrfs_csum_verify_file_block(struct btrfs_root *root, ret = PTR_ERR(item); /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) - ret = 1; + ret = -ENOENT; goto fail; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c1a0adde9b1c..5b87c4e9d491 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -17,6 +17,7 @@ #include "transaction.h" #include "btrfs_inode.h" #include "ioctl.h" +#include "print-tree.h" struct btrfs_iget_args { u64 ino; @@ -421,14 +422,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = btrfs_file_extent_disk_num_blocks(fi); /* FIXME blocksize != 4096 */ - inode->i_blocks -= - btrfs_file_extent_num_blocks(fi) << 3; - found_extent = 1; + num_dec = btrfs_file_extent_num_blocks(fi) << 3; + if (extent_start != 0) { + found_extent = 1; + inode->i_blocks -= num_dec; + } } } ret = btrfs_del_item(trans, root, path); @@ -448,6 +452,43 @@ error: return ret; } +static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 mask = root->blocksize - 1; + u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_size; + + if (attr->ia_size < pos) + goto out; + hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size >>= inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + pos, 0, 0, hole_size); + BUG_ON(err); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + + err = inode_setattr(inode, attr); + +out: + return err; +} static void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; @@ -1169,8 +1210,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (found_type == BTRFS_FILE_EXTENT_REG) { extent_start = extent_start >> inode->i_blkbits; extent_end = extent_start + btrfs_file_extent_num_blocks(item); + err = 0; + if (blocknr == 0) + goto out; if (iblock >= extent_start && iblock < extent_end) { - err = 0; btrfs_map_bh_to_logical(root, result, blocknr + iblock - extent_start); goto out; @@ -1591,7 +1634,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, ptr, bh->b_data, offset + write_bytes); mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - } else { + } else if (buffer_mapped(bh)) { btrfs_csum_file_block(trans, root, inode->i_ino, pages[i]->index << PAGE_CACHE_SHIFT, kmap(pages[i]), PAGE_CACHE_SIZE); @@ -1693,15 +1736,24 @@ static int drop_extents(struct btrfs_trans_handle *trans, goto out; } - search_start = extent_end; + if (found_inline) { + u64 mask = root->blocksize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; if (end < extent_end && end >= key.offset) { if (found_extent) { + u64 disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + u64 disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); memcpy(&old, extent, sizeof(old)); - ret = btrfs_inc_extent_ref(trans, root, - btrfs_file_extent_disk_blocknr(&old), - btrfs_file_extent_disk_num_blocks(&old)); - BUG_ON(ret); + if (disk_blocknr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_blocknr, disk_num_blocks); + BUG_ON(ret); + } } WARN_ON(found_inline); bookend = 1; @@ -1719,7 +1771,10 @@ static int drop_extents(struct btrfs_trans_handle *trans, old_num = btrfs_file_extent_num_blocks(extent); *hint_block = btrfs_file_extent_disk_blocknr(extent); - inode->i_blocks -= (old_num - new_num) << 3; + if (btrfs_file_extent_disk_blocknr(extent)) { + inode->i_blocks -= + (old_num - new_num) << 3; + } btrfs_set_file_extent_num_blocks(extent, new_num); mark_buffer_dirty(path->nodes[0]); @@ -1745,7 +1800,7 @@ static int drop_extents(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent) { + if (found_extent && disk_blocknr != 0) { inode->i_blocks -= extent_num_blocks << 3; ret = btrfs_free_extent(trans, root, disk_blocknr, @@ -1785,18 +1840,19 @@ static int drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_offset(&old) + ((end - key.offset) >> inode->i_blkbits)); WARN_ON(btrfs_file_extent_num_blocks(&old) < - (end - key.offset) >> inode->i_blkbits); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_num_blocks(extent, - btrfs_file_extent_num_blocks(&old) - - ((end - key.offset) >> inode->i_blkbits)); + (extent_end - end) >> inode->i_blkbits); btrfs_set_file_extent_type(extent, BTRFS_FILE_EXTENT_REG); btrfs_set_file_extent_generation(extent, btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + if (btrfs_file_extent_disk_blocknr(&old) != 0) { + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; + } ret = 0; goto out; } @@ -1960,6 +2016,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ~((u64)root->blocksize - 1), &hint_block); BUG_ON(ret); } + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (inode->i_size + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + ret = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(ret); + } if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { ret = btrfs_alloc_extent(trans, root, inode->i_ino, @@ -1967,7 +2038,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, &ins, 1); BUG_ON(ret); ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset); + start_pos, ins.objectid, ins.offset, + ins.offset); BUG_ON(ret); } else { ins.offset = 0; @@ -2044,10 +2116,13 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, page->index << PAGE_CACHE_SHIFT, kmap(page), PAGE_CACHE_SIZE); if (ret) { - printk("failed to verify ino %lu page %lu\n", - page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_CACHE_SIZE); + if (ret != -ENOENT) { + printk("failed to verify ino %lu page %lu\n", + page->mapping->host->i_ino, + page->index); + memset(page_address(page), 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + } } SetPageChecked(page); kunmap(page); @@ -2759,6 +2834,7 @@ static struct inode_operations btrfs_dir_inode_operations = { .rmdir = btrfs_rmdir, .rename = btrfs_rename, .symlink = btrfs_symlink, + .setattr = btrfs_setattr, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2788,6 +2864,7 @@ static struct address_space_operations btrfs_symlink_aops = { static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, + .setattr = btrfs_setattr, }; static struct file_operations btrfs_file_operations = { -- cgit v1.2.3 From 1de037a43edf67f3a9f66dd197195b3c08febb16 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 May 2007 15:17:08 -0400 Subject: Btrfs: fixup various fsx failures Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/file-item.c | 30 ++++++++ fs/btrfs/super.c | 213 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 214 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e8f741167c6b..756ea9d0123b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1198,5 +1198,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 objectid, u64 offset, int cow); +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize); /* super.c */ #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 7990b5742114..0782e924ddeb 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -2,6 +2,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" #define MAX_CSUM_ITEMS(r) ((((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ @@ -27,6 +28,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); + if (ret) { +printk("failed to insert %Lu %Lu ret %d\n", objectid, pos, ret); +btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); + } BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -218,6 +223,31 @@ fail: return ret; } +int btrfs_csum_truncate(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u64 isize) +{ + struct btrfs_key key; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + int slot = path->slots[0]; + int ret; + u32 new_item_size; + u64 new_item_span; + u64 blocks; + + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (isize <= key.offset) + return 0; + new_item_span = isize - key.offset; + blocks = (new_item_span + root->blocksize - 1) / root->blocksize; + new_item_size = blocks * BTRFS_CRC32_SIZE; + if (new_item_size >= btrfs_item_size(leaf->items + slot)) + return 0; + ret = btrfs_truncate_item(trans, root, path, new_item_size); + BUG_ON(ret); + return ret; +} + int btrfs_csum_verify_file_block(struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5b87c4e9d491..0eb64d6eaf9d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -36,6 +36,13 @@ static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_file_operations; +static int drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 start, u64 end, u64 *hint_block); +static int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create); + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -381,10 +388,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct btrfs_disk_key *found_key; u32 found_type; struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi = NULL; + struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; + u64 item_end = 0; int found_extent; + int del_item; path = btrfs_alloc_path(); BUG_ON(!path); @@ -394,6 +403,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, key.flags = (u32)-1; while(1) { btrfs_init_path(path); + fi = NULL; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { goto error; @@ -413,16 +423,52 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - if (btrfs_disk_key_offset(found_key) < inode->i_size) - break; - found_extent = 0; - if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) { + item_end = btrfs_disk_key_offset(found_key); + if (found_type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); if (btrfs_file_extent_type(fi) != BTRFS_FILE_EXTENT_INLINE) { - u64 num_dec; + item_end += btrfs_file_extent_num_blocks(fi) << + inode->i_blkbits; + } + } + if (found_type == BTRFS_CSUM_ITEM_KEY) { + ret = btrfs_csum_truncate(trans, root, path, + inode->i_size); + BUG_ON(ret); + } + if (item_end < inode->i_size) { + if (found_type) { + btrfs_set_key_type(&key, found_type - 1); + continue; + } + break; + } + if (btrfs_disk_key_offset(found_key) >= inode->i_size) + del_item = 1; + else + del_item = 0; + found_extent = 0; + + if (found_type == BTRFS_EXTENT_DATA_KEY && + btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; + if (!del_item) { + u64 orig_num_blocks = + btrfs_file_extent_num_blocks(fi); + extent_num_blocks = inode->i_size - + btrfs_disk_key_offset(found_key) + + root->blocksize - 1; + extent_num_blocks >>= inode->i_blkbits; + btrfs_set_file_extent_num_blocks(fi, + extent_num_blocks); + inode->i_blocks -= (orig_num_blocks - + extent_num_blocks) << 3; + mark_buffer_dirty(path->nodes[0]); + } else { extent_start = btrfs_file_extent_disk_blocknr(fi); extent_num_blocks = @@ -435,8 +481,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } } } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (del_item) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + break; + } btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, @@ -452,6 +502,68 @@ error: return ret; } +static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + char *kaddr; + int ret = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 alloc_hint; + struct btrfs_key ins; + struct btrfs_trans_handle *trans; + + if ((offset & (blocksize - 1)) == 0) + goto out; + + ret = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + goto out; + + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + lock_page(page); + if (!PageUptodate(page)) { + ret = -EIO; + goto out; + } + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT, + (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); + BUG_ON(ret); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + alloc_hint, (u64)-1, &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + ins.objectid, 1, 1); + BUG_ON(ret); + SetPageChecked(page); + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + kaddr, PAGE_CACHE_SIZE); + kunmap(page); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +out: + return ret; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -469,8 +581,11 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) u64 pos = (inode->i_size + mask) & ~mask; u64 hole_size; - if (attr->ia_size < pos) + if (attr->ia_size <= pos) goto out; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + hole_size = (attr->ia_size - pos + mask) & ~mask; hole_size >>= inode->i_blkbits; @@ -483,10 +598,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); } - +out: err = inode_setattr(inode, attr); -out: return err; } static void btrfs_delete_inode(struct inode *inode) @@ -1161,17 +1275,30 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; + u64 alloc_hint = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; struct btrfs_leaf *leaf; struct btrfs_disk_key *found_key; + struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); if (create) { WARN_ON(1); + /* this almost but not quite works */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out; + } + ret = drop_extents(trans, root, inode, + iblock << inode->i_blkbits, + (iblock + 1) << inode->i_blkbits, + &alloc_hint); + BUG_ON(ret); } ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1185,7 +1312,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, if (ret != 0) { if (path->slots[0] == 0) { btrfs_release_path(root, path); - goto out; + goto not_found; } path->slots[0]--; } @@ -1203,7 +1330,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, found_type != BTRFS_EXTENT_DATA_KEY) { extent_end = 0; extent_start = 0; - goto out; + goto not_found; } found_type = btrfs_file_extent_type(item); extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); @@ -1211,7 +1338,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_start = extent_start >> inode->i_blkbits; extent_end = extent_start + btrfs_file_extent_num_blocks(item); err = 0; - if (blocknr == 0) + if (btrfs_file_extent_disk_blocknr(item) == 0) goto out; if (iblock >= extent_start && iblock < extent_end) { btrfs_map_bh_to_logical(root, result, blocknr + @@ -1227,7 +1354,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, extent_end = (extent_start + size) >> inode->i_blkbits; extent_start >>= inode->i_blkbits; if (iblock < extent_start || iblock > extent_end) { - goto out; + goto not_found; } ptr = btrfs_file_extent_inline_start(item); map = kmap(result->b_page); @@ -1239,7 +1366,24 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, SetPageChecked(result->b_page); btrfs_map_bh_to_logical(root, result, 0); } +not_found: + if (create) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + 1, alloc_hint, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + iblock << inode->i_blkbits, + ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, ins.objectid); + } out: + if (trans) + err = btrfs_end_transaction(trans, root); btrfs_free_path(path); return err; } @@ -1258,7 +1402,7 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return nobh_prepare_write(page, from, to, btrfs_get_block); + return block_prepare_write(page, from, to, btrfs_get_block); } static void btrfs_write_super(struct super_block *sb) @@ -1500,12 +1644,13 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - nobh_truncate_page(inode->i_mapping, inode->i_size); + btrfs_truncate_page(inode->i_mapping, inode->i_size); - /* FIXME, add redo link to tree so we don't leak on crash */ mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); + + /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); BUG_ON(ret); btrfs_update_inode(trans, root, inode); @@ -1515,10 +1660,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_btree_balance_dirty(root); } -/* - * Make sure any changes to nobh_commit_write() are reflected in - * nobh_truncate_page(), since it doesn't call commit_write(). - */ static int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -1528,6 +1669,7 @@ static int btrfs_commit_write(struct file *file, struct page *page, SetPageUptodate(page); bh = page_buffers(page); + set_buffer_uptodate(bh); if (buffer_mapped(bh) && bh->b_blocknr != 0) { set_page_dirty(page); } @@ -1972,8 +2114,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, first_index = pos >> PAGE_CACHE_SHIFT; last_index = (pos + count) >> PAGE_CACHE_SHIFT; - if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size && - (pos & (PAGE_CACHE_SIZE - 1))) { + if ((pos & (PAGE_CACHE_SIZE - 1))) { pinned[0] = grab_cache_page(inode->i_mapping, first_index); if (!PageUptodate(pinned[0])) { ret = mpage_readpage(pinned[0], btrfs_get_block); @@ -1983,10 +2124,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, unlock_page(pinned[0]); } } - if (first_index != last_index && - (last_index << PAGE_CACHE_SHIFT) < inode->i_size && - pos + count < inode->i_size && - (count & (PAGE_CACHE_SIZE - 1))) { + if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { pinned[1] = grab_cache_page(inode->i_mapping, last_index); if (!PageUptodate(pinned[1])) { ret = mpage_readpage(pinned[1], btrfs_get_block); @@ -2110,23 +2248,34 @@ static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, if (!PageChecked(page)) { /* FIXME, do it per block */ struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct buffer_head *bh; + + if (page_has_buffers(page)) { + bh = page_buffers(page); + if (!buffer_mapped(bh)) { + SetPageChecked(page); + goto checked; + } + } - int ret = btrfs_csum_verify_file_block(root, + ret = btrfs_csum_verify_file_block(root, page->mapping->host->i_ino, page->index << PAGE_CACHE_SHIFT, kmap(page), PAGE_CACHE_SIZE); if (ret) { if (ret != -ENOENT) { - printk("failed to verify ino %lu page %lu\n", + printk("failed to verify ino %lu page %lu ret %d\n", page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_CACHE_SIZE); + page->index, ret); + memset(page_address(page), 1, PAGE_CACHE_SIZE); flush_dcache_page(page); } } SetPageChecked(page); kunmap(page); } +checked: /* * Faults on the destination of a read are common, so do it before * taking the kmap. -- cgit v1.2.3 From fabb568183de7996257080260d3537fa75b3667e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 7 Jun 2007 22:13:21 -0400 Subject: Btrfs: d_type optimization Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++++++++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/super.c | 66 +++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 68 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1bb62867deda..a1c95c980fdc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -34,6 +34,16 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_CRC32_SIZE 4 #define BTRFS_EMPTY_DIR_SIZE 6 +#define BTRFS_FT_UNKNOWN 0 +#define BTRFS_FT_REG_FILE 1 +#define BTRFS_FT_DIR 2 +#define BTRFS_FT_CHRDEV 3 +#define BTRFS_FT_BLKDEV 4 +#define BTRFS_FT_FIFO 5 +#define BTRFS_FT_SOCK 6 +#define BTRFS_FT_SYMLINK 7 +#define BTRFS_FT_MAX 8 + /* * the key defines the order in the tree, and so it also defines (optimal) * block layout. objectid corresonds to the inode number. The flags diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 85616b458e18..645a4228bfe2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1264,6 +1264,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0, data); + BUG_ON(ret); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb64d6eaf9d..b58b4cf66766 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -43,6 +43,18 @@ static int drop_extents(struct btrfs_trans_handle *trans, static int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create); + +#define S_SHIFT 12 +static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, +}; + static void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -785,6 +797,9 @@ static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, break; } } +static unsigned char btrfs_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -799,7 +814,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_leaf *leaf; int slot; int advance; - unsigned char d_type = DT_UNKNOWN; + unsigned char d_type; int over = 0; u32 di_cur; u32 di_total; @@ -853,6 +868,7 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) di_cur = 0; di_total = btrfs_item_size(leaf->items + slot); while(di_cur < di_total) { + d_type = btrfs_filetype_table[btrfs_dir_type(di)]; over = filldir(dirent, (const char *)(di + 1), btrfs_dir_name_len(di), btrfs_disk_key_offset(&item->key), @@ -1012,6 +1028,11 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return inode; } +static inline u8 btrfs_inode_type(struct inode *inode) +{ + return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; +} + static int btrfs_add_link(struct btrfs_trans_handle *trans, struct dentry *dentry, struct inode *inode) { @@ -1026,7 +1047,7 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, - &key, 0); + &key, btrfs_inode_type(inode)); if (ret == 0) { dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; ret = btrfs_update_inode(trans, root, @@ -1150,12 +1171,12 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, 1); + &key, BTRFS_FT_DIR); if (ret) goto error; error: @@ -1265,6 +1286,10 @@ printk("btrfs sync_fs\n"); return 0; } +#define BTRFS_GET_BLOCK_NO_CREATE 0 +#define BTRFS_GET_BLOCK_CREATE 1 +#define BTRFS_GET_BLOCK_NO_DIRECT 2 + static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { @@ -1286,7 +1311,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, path = btrfs_alloc_path(); BUG_ON(!path); btrfs_init_path(path); - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { WARN_ON(1); /* this almost but not quite works */ trans = btrfs_start_transaction(root, 1); @@ -1349,6 +1374,11 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, char *ptr; char *map; u32 size; + + if (create & BTRFS_GET_BLOCK_NO_DIRECT) { + err = -EINVAL; + goto out; + } size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = (extent_start + size) >> inode->i_blkbits; @@ -1367,7 +1397,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, btrfs_map_bh_to_logical(root, result, 0); } not_found: - if (create) { + if (create & BTRFS_GET_BLOCK_CREATE) { struct btrfs_key ins; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, @@ -1399,6 +1429,21 @@ static int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } +static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static sector_t btrfs_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, btrfs_get_block_bmap); +} + static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -2428,7 +2473,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.offset = (u64)-1; dir = root->fs_info->sb->s_root->d_inode; ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, 0); + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); BUG_ON(ret); ret = btrfs_commit_transaction(trans, root); @@ -2505,7 +2551,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, root->fs_info->sb->s_root->d_inode->i_ino, - &key, 0); + &key, BTRFS_FT_DIR); BUG_ON(ret); @@ -2833,7 +2879,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, btrfs_release_path(root, path); ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, 0); + old_inode->i_ino, location, + BTRFS_FT_DIR); if (ret) goto out_fail; } @@ -3003,6 +3050,7 @@ static struct address_space_operations btrfs_aops = { .sync_page = block_sync_page, .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, + .bmap = btrfs_bmap, }; static struct address_space_operations btrfs_symlink_aops = { -- cgit v1.2.3 From 08607c1b182b3b8b54d7141a0c012cda17d201e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 15:33:54 -0400 Subject: Btrfs: add compat ioctl Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/super.c | 27 +++++++++++++++++++++-- fs/btrfs/transaction.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.h | 8 +++++++ 6 files changed, 98 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 861e34ae0979..61d25b5de287 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -7,7 +7,6 @@ * Get rid of struct ctree_path, limiting tree levels held at one time * Add generation number to key pointer in nodes * Add generation number to inode -* Add ability to switch a block group from data to metadata or vice versa * forbid cross subvolume renames and hardlinks * Release * Do real tree locking diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a1c95c980fdc..74e6aadf6bb7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -281,6 +281,7 @@ struct btrfs_block_group_cache { }; struct crypto_hash; + struct btrfs_fs_info { struct btrfs_root *extent_root; struct btrfs_root *tree_root; @@ -308,9 +309,9 @@ struct btrfs_fs_info { struct mutex fs_mutex; struct list_head trans_list; struct crypto_hash *hash_tfm; + struct delayed_work trans_work; spinlock_t hash_lock; int do_barriers; - struct kobject kobj; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3da057f78bc8..048282f06afb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -577,6 +577,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -648,7 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); mutex_unlock(&fs_info->fs_mutex); return tree_root; } @@ -746,6 +746,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); @@ -776,7 +777,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); - kobject_unregister(&fs_info->kobj); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b58b4cf66766..5864917953a4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -950,7 +951,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -ENOMEM; } sb->s_root = root_dentry; - + btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; } @@ -1452,7 +1453,7 @@ static int btrfs_prepare_write(struct file *file, struct page *page, static void btrfs_write_super(struct super_block *sb) { - btrfs_sync_fs(sb, 1); + sb->s_dirt = 0; } static int btrfs_readpage(struct file *file, struct page *page) @@ -2698,6 +2699,20 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int return ret; } +#ifdef CONFIG_COMPAT +static long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int ret; + lock_kernel(); + ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; + +} +#endif + static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; @@ -3042,6 +3057,9 @@ static struct file_operations btrfs_dir_file_operations = { .read = generic_read_dir, .readdir = btrfs_readdir, .ioctl = btrfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct address_space_operations btrfs_aops = { @@ -3073,6 +3091,9 @@ static struct file_operations btrfs_file_operations = { .open = generic_file_open, .ioctl = btrfs_ioctl, .fsync = btrfs_sync_file, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif }; static struct inode_operations btrfs_symlink_inode_operations = { @@ -3085,6 +3106,7 @@ static int __init init_btrfs_fs(void) { int err; printk("btrfs loaded!\n"); + btrfs_init_transaction_sys(); err = init_inodecache(); if (err) return err; @@ -3095,6 +3117,7 @@ static int __init init_btrfs_fs(void) static void __exit exit_btrfs_fs(void) { + btrfs_exit_transaction_sys(); destroy_inodecache(); unregister_filesystem(&btrfs_fs_type); printk("btrfs unloaded\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f0f03121b7b2..bf7eef67ba0b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -8,6 +8,8 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; +static struct workqueue_struct *trans_wq; + #define BTRFS_ROOT_TRANS_TAG 0 #define TRANS_MAGIC 0xE1E10E @@ -44,6 +46,7 @@ static int join_transaction(struct btrfs_root *root) cur_trans->in_commit = 0; cur_trans->use_count = 1; cur_trans->commit_done = 0; + cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); init_bit_radix(&cur_trans->dirty_pages); } @@ -350,3 +353,60 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +void btrfs_transaction_cleaner(struct work_struct *work) +{ + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + trans_work.work); + + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_transaction *cur; + struct btrfs_trans_handle *trans; + unsigned long now; + unsigned long delay = HZ * 30; + int ret; + +printk("btrfs transaction cleaner\n"); + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto out; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto out; + } + mutex_unlock(&root->fs_info->trans_mutex); +printk("forcing commit\n"); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +out: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_transaction_queue_work(root, delay); +} + +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) +{ + queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); +} + +void btrfs_transaction_flush_work(struct btrfs_root *root) +{ + cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work); + flush_workqueue(trans_wq); +} + +void __init btrfs_init_transaction_sys(void) +{ + trans_wq = create_workqueue("btrfs"); +} + +void __exit btrfs_exit_transaction_sys(void) +{ + destroy_workqueue(trans_wq); +} + diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b5378119e76c..0b08208be853 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -11,6 +11,7 @@ struct btrfs_transaction { int magic; struct list_head list; struct radix_tree_root dirty_pages; + unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; }; @@ -47,4 +48,11 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); + +void btrfs_transaction_cleaner(struct work_struct *work); +void btrfs_transaction_flush_work(struct btrfs_root *root); +void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); +void btrfs_init_transaction_sys(void); +void btrfs_exit_transaction_sys(void); + #endif -- cgit v1.2.3 From 0cf6c620176a294bdf1bedddc492f6ae857e0019 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 9 Jun 2007 09:22:25 -0400 Subject: Btrfs: remove device tree Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 105 +---------------------- fs/btrfs/disk-io.c | 224 ++----------------------------------------------- fs/btrfs/super.c | 88 ------------------- fs/btrfs/transaction.c | 6 -- 4 files changed, 10 insertions(+), 413 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e93ba1a5c812..5ab25a0cb16a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -16,11 +16,10 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_MAGIC "_BtRfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL -#define BTRFS_DEV_TREE_OBJECTID 2ULL -#define BTRFS_EXTENT_TREE_OBJECTID 3ULL -#define BTRFS_FS_TREE_OBJECTID 4ULL -#define BTRFS_ROOT_TREE_DIR_OBJECTID 5ULL -#define BTRFS_FIRST_FREE_OBJECTID 6ULL +#define BTRFS_EXTENT_TREE_OBJECTID 2ULL +#define BTRFS_FS_TREE_OBJECTID 3ULL +#define BTRFS_ROOT_TREE_DIR_OBJECTID 4ULL +#define BTRFS_FIRST_FREE_OBJECTID 5ULL /* * we can actually store much bigger names, but lets not confuse the rest @@ -111,12 +110,6 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le64 last_device_id; - /* fields below here vary with the underlying disk */ - __le64 device_block_start; - __le64 device_num_blocks; - __le64 device_root; - __le64 device_id; } __attribute__ ((__packed__)); /* @@ -251,11 +244,6 @@ struct btrfs_csum_item { u8 csum; } __attribute__ ((__packed__)); -struct btrfs_device_item { - __le16 pathlen; - __le64 device_id; -} __attribute__ ((__packed__)); - /* tag for the radix tree of block groups in ram */ #define BTRFS_BLOCK_GROUP_DIRTY 0 #define BTRFS_BLOCK_GROUP_AVAIL 1 @@ -286,11 +274,9 @@ struct btrfs_fs_info { spinlock_t hash_lock; struct btrfs_root *extent_root; struct btrfs_root *tree_root; - struct btrfs_root *dev_root; struct radix_tree_root fs_roots_radix; struct radix_tree_root pending_del_radix; struct radix_tree_root pinned_radix; - struct radix_tree_root dev_radix; struct radix_tree_root block_group_radix; struct radix_tree_root block_group_data_radix; struct radix_tree_root extent_map_radix; @@ -385,11 +371,6 @@ struct btrfs_root { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 34 -/* - * dev items list the devices that make up the FS - */ -#define BTRFS_DEV_ITEM_KEY 35 - /* * string items are for debugging. They just store a short string of * data in the FS @@ -880,62 +861,6 @@ static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 s->root_dir_objectid = cpu_to_le64(val); } -static inline u64 btrfs_super_last_device_id(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->last_device_id); -} - -static inline void btrfs_set_super_last_device_id(struct btrfs_super_block *s, - u64 val) -{ - s->last_device_id = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_id(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_id); -} - -static inline void btrfs_set_super_device_id(struct btrfs_super_block *s, - u64 val) -{ - s->device_id = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_block_start(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_block_start); -} - -static inline void btrfs_set_super_device_block_start(struct btrfs_super_block - *s, u64 val) -{ - s->device_block_start = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_num_blocks(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_num_blocks); -} - -static inline void btrfs_set_super_device_num_blocks(struct btrfs_super_block - *s, u64 val) -{ - s->device_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_super_device_root(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->device_root); -} - -static inline void btrfs_set_super_device_root(struct btrfs_super_block - *s, u64 val) -{ - s->device_root = cpu_to_le64(val); -} - - static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) { return (u8 *)l->items; @@ -1031,28 +956,6 @@ static inline void btrfs_set_file_extent_num_blocks(struct e->num_blocks = cpu_to_le64(val); } -static inline u16 btrfs_device_pathlen(struct btrfs_device_item *d) -{ - return le16_to_cpu(d->pathlen); -} - -static inline void btrfs_set_device_pathlen(struct btrfs_device_item *d, - u16 val) -{ - d->pathlen = cpu_to_le16(val); -} - -static inline u64 btrfs_device_id(struct btrfs_device_item *d) -{ - return le64_to_cpu(d->device_id); -} - -static inline void btrfs_set_device_id(struct btrfs_device_item *d, - u64 val) -{ - d->device_id = cpu_to_le64(val); -} - static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 751069c0e9f5..8a88404525e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,47 +11,9 @@ #include "transaction.h" #include "btrfs_inode.h" -struct dev_lookup { - u64 block_start; - u64 num_blocks; - u64 device_id; - struct block_device *bdev; -}; - -int btrfs_insert_dev_radix(struct btrfs_root *root, - struct block_device *bdev, - u64 device_id, - u64 block_start, - u64 num_blocks) -{ - struct dev_lookup *lookup; - int ret; - - lookup = kmalloc(sizeof(*lookup), GFP_NOFS); - if (!lookup) - return -ENOMEM; - lookup->block_start = block_start; - lookup->num_blocks = num_blocks; - lookup->bdev = bdev; - lookup->device_id = device_id; - - ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + - num_blocks - 1, lookup); - return ret; -} - u64 bh_blocknr(struct buffer_head *bh) { - int blkbits = bh->b_page->mapping->host->i_blkbits; - u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); - unsigned long offset; - - if (PageHighMem(bh->b_page)) - offset = (unsigned long)bh->b_data; - else - offset = bh->b_data - (char *)page_address(bh->b_page); - blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); - return blocknr; + return bh->b_blocknr; } static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) @@ -102,32 +64,14 @@ out_unlock: int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { - struct dev_lookup *lookup[2]; - - int ret; - if (logical == 0) { bh->b_bdev = NULL; bh->b_blocknr = 0; set_buffer_mapped(bh); - return 0; - } - root = root->fs_info->dev_root; - ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, - (void **)lookup, - (unsigned long)logical, - ARRAY_SIZE(lookup)); - if (ret == 0 || lookup[0]->block_start > logical || - lookup[0]->block_start + lookup[0]->num_blocks <= logical) { - ret = -ENOENT; - goto out; + } else { + map_bh(bh, root->fs_info->sb, logical); } - bh->b_bdev = lookup[0]->bdev; - bh->b_blocknr = logical - lookup[0]->block_start; - set_buffer_mapped(bh); - ret = 0; -out: - return ret; + return 0; } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, @@ -382,24 +326,18 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; -printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); - if (root) { -printk("found %p in cache\n", root); + if (root) return root; - } root = kmalloc(sizeof(*root), GFP_NOFS); - if (!root) { -printk("failed1\n"); + if (!root) return ERR_PTR(-ENOMEM); - } if (location->offset == (u64)-1) { ret = find_and_setup_root(fs_info->sb->s_blocksize, fs_info->tree_root, fs_info, location->objectid, root); if (ret) { -printk("failed2\n"); kfree(root); return ERR_PTR(ret); } @@ -413,7 +351,6 @@ printk("failed2\n"); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret != 0) { -printk("internal search_slot gives us %d\n", ret); if (ret > 0) ret = -ENOENT; goto out; @@ -435,13 +372,11 @@ out: btrfs_root_blocknr(&root->root_item)); BUG_ON(!root->node); insert: -printk("inserting %p\n", root); root->ref_cows = 1; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); if (ret) { -printk("radix_tree_insert gives us %d\n", ret); brelse(root->node); kfree(root); return ERR_PTR(ret); @@ -450,116 +385,25 @@ printk("radix_tree_insert gives us %d\n", ret); if (ret == 0) { root->highest_inode = highest_inode; root->last_inode_alloc = highest_inode; -printk("highest inode is %Lu\n", highest_inode); } -printk("all worked\n"); return root; } -static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, - u64 block_start, u64 num_blocks, - char *filename, int name_len) -{ - char *null_filename; - struct block_device *bdev; - int ret; - - null_filename = kmalloc(name_len + 1, GFP_NOFS); - if (!null_filename) - return -ENOMEM; - memcpy(null_filename, filename, name_len); - null_filename[name_len] = '\0'; - - bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - goto out; - } - set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, device_id, - block_start, num_blocks); - BUG_ON(ret); - ret = 0; -out: - kfree(null_filename); - return ret; -} - -static int read_device_info(struct btrfs_root *root) -{ - struct btrfs_path *path; - int ret; - struct btrfs_key key; - struct btrfs_leaf *leaf; - struct btrfs_device_item *dev_item; - int nritems; - int slot; - - root = root->fs_info->dev_root; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - key.objectid = 0; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - while(1) { - slot = path->slots[0]; - if (slot >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { - path->slots[0]++; - continue; - } - dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); -printk("found key %Lu %Lu\n", key.objectid, key.offset); - if (btrfs_device_id(dev_item) != - btrfs_super_device_id(root->fs_info->disk_super)) { - ret = btrfs_open_disk(root, btrfs_device_id(dev_item), - key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); - } - path->slots[0]++; - } - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; struct btrfs_super_block *disk_super; - struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); @@ -568,7 +412,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->dev_root = dev_root; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -595,19 +438,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, dev_root, - fs_info, BTRFS_DEV_TREE_OBJECTID); - __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); - dev_lookup->block_start = 0; - dev_lookup->num_blocks = (u32)-2; - dev_lookup->bdev = sb->s_bdev; - dev_lookup->device_id = 0; - ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); - BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -622,24 +455,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - radix_tree_delete(&fs_info->dev_radix, (u32)-2); - dev_lookup->block_start = btrfs_super_device_block_start(disk_super); - dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); - dev_lookup->device_id = btrfs_super_device_id(disk_super); - - ret = radix_tree_insert(&fs_info->dev_radix, - dev_lookup->block_start + - dev_lookup->num_blocks - 1, dev_lookup); - BUG_ON(ret); - fs_info->disk_super = disk_super; - - dev_root->node = read_tree_block(tree_root, - btrfs_super_device_root(disk_super)); - - ret = read_device_info(dev_root); - BUG_ON(ret); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -719,30 +535,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) return 0; } -static int free_dev_radix(struct btrfs_fs_info *fs_info) -{ - struct dev_lookup *lookup[8]; - struct block_device *super_bdev = fs_info->sb->s_bdev; - int ret; - int i; - while(1) { - ret = radix_tree_gang_lookup(&fs_info->dev_radix, - (void **)lookup, 0, - ARRAY_SIZE(lookup)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - if (lookup[i]->bdev != super_bdev) - close_bdev_excl(lookup[i]->bdev); - radix_tree_delete(&fs_info->dev_radix, - lookup[i]->block_start + - lookup[i]->num_blocks - 1); - kfree(lookup[i]); - } - } - return 0; -} - int close_ctree(struct btrfs_root *root) { int ret; @@ -765,9 +557,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->dev_root->node) - btrfs_block_release(fs_info->dev_root, - fs_info->dev_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -776,7 +565,6 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); - free_dev_radix(fs_info); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5864917953a4..2b24a5a2be63 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2566,83 +2566,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) return 0; } -static int add_disk(struct btrfs_root *root, char *name, int namelen) -{ - struct block_device *bdev; - struct btrfs_path *path; - struct super_block *sb = root->fs_info->sb; - struct btrfs_root *dev_root = root->fs_info->dev_root; - struct btrfs_trans_handle *trans; - struct btrfs_device_item *dev_item; - struct btrfs_key key; - u16 item_size; - u64 num_blocks; - u64 new_blocks; - u64 device_id; - int ret; - -printk("adding disk %s\n", name); - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super); - bdev = open_bdev_excl(name, O_RDWR, sb); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); -printk("open bdev excl failed ret %d\n", ret); - goto out_nolock; - } - set_blocksize(bdev, sb->s_blocksize); - new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits; - key.objectid = num_blocks; - key.offset = new_blocks; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); - - mutex_lock(&dev_root->fs_info->fs_mutex); - trans = btrfs_start_transaction(dev_root, 1); - item_size = sizeof(*dev_item) + namelen; -printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size); - ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size); - if (ret) { -printk("insert failed %d\n", ret); - close_bdev_excl(bdev); - if (ret > 0) - ret = -EEXIST; - goto out; - } - dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_device_item); - btrfs_set_device_pathlen(dev_item, namelen); - memcpy(dev_item + 1, name, namelen); - - device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1; - btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id); - btrfs_set_device_id(dev_item, device_id); - mark_buffer_dirty(path->nodes[0]); - - ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks, - new_blocks); - - if (!ret) { - btrfs_set_super_total_blocks(root->fs_info->disk_super, - num_blocks + new_blocks); - i_size_write(root->fs_info->btree_inode, - (num_blocks + new_blocks) << - root->fs_info->btree_inode->i_blkbits); - } - -out: - ret = btrfs_commit_transaction(trans, dev_root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); -out_nolock: - btrfs_free_path(path); - btrfs_btree_balance_dirty(root); - - return ret; -} - static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2682,17 +2605,6 @@ static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ret = create_snapshot(root, vol_args.name, namelen); WARN_ON(ret); break; - case BTRFS_IOC_ADD_DISK: - if (copy_from_user(&vol_args, - (struct btrfs_ioctl_vol_args __user *)arg, - sizeof(vol_args))) - return -EFAULT; - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - vol_args.name[namelen] = '\0'; - ret = add_disk(root, vol_args.name, namelen); - break; default: return -ENOTTY; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2c5820f1d3d..4f3c8ddcb401 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -150,13 +150,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *dev_root = fs_info->dev_root; - if (btrfs_super_device_root(fs_info->disk_super) != - bh_blocknr(dev_root->node)) { - btrfs_set_super_device_root(fs_info->disk_super, - bh_blocknr(dev_root->node)); - } btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); -- cgit v1.2.3 From 5276aedab0baacfb3c5483208b8be85a8416bd5f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 11 Jun 2007 21:33:38 -0400 Subject: Btrfs: fix oops after block group lookup Signed-off-by: Chris Mason --- fs/btrfs/TODO | 1 - fs/btrfs/ctree.h | 3 +++ fs/btrfs/extent-tree.c | 29 ++++++++++++++++------------- fs/btrfs/super.c | 8 ++------ 4 files changed, 21 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 61d25b5de287..d9b6d38c603a 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -16,6 +16,5 @@ * Use relocation to try and fix write errors * Make allocator much smarter * xattrs (directory streams for regular files) -* fsck * Scrub & defrag diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ab25a0cb16a..4e136b7b03ae 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -998,6 +998,9 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh) btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 645a4228bfe2..f509ffa38d03 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -135,9 +135,9 @@ printk("cache block group %Lu\n", block_group->key.objectid); return 0; } -static struct btrfs_block_group_cache *lookup_block_group(struct - btrfs_fs_info *info, - u64 blocknr) +struct btrfs_block_group_cache *btrfs_lookup_block_group(struct + btrfs_fs_info *info, + u64 blocknr) { struct btrfs_block_group_cache *block_group; int ret; @@ -208,7 +208,8 @@ out: return max(cache->last_alloc, search_start); new_group: - cache = lookup_block_group(root->fs_info, last + cache->key.offset - 1); + cache = btrfs_lookup_block_group(root->fs_info, + last + cache->key.offset - 1); if (!cache) { return max((*cache_ret)->last_alloc, search_start); } @@ -250,7 +251,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, if (search_start) { struct btrfs_block_group_cache *shint; - shint = lookup_block_group(info, search_start); + shint = btrfs_lookup_block_group(info, search_start); if (shint->data == data) { used = btrfs_block_group_used(&shint->item); if (used + shint->pinned < @@ -576,7 +577,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, int ret; while(total) { - cache = lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, blocknr); if (!cache) { printk(KERN_CRIT "blocknr %Lu lookup failed\n", blocknr); @@ -677,8 +678,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct first = gang[0]; for (i = 0; i < ret; i++) { clear_radix_bit(pinned_radix, gang[i]); - block_group = lookup_block_group(root->fs_info, - gang[i]); + block_group = btrfs_lookup_block_group(root->fs_info, + gang[i]); if (block_group) { WARN_ON(block_group->pinned == 0); block_group->pinned--; @@ -751,7 +752,8 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { struct btrfs_block_group_cache *cache; - cache = lookup_block_group(root->fs_info, blocknr); + cache = btrfs_lookup_block_group(root->fs_info, + blocknr); if (cache) cache->pinned++; } @@ -851,7 +853,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct for (i = 0; i < ret; i++) { wret = set_radix_bit(pinned_radix, gang[i]); if (wret == 0) { - cache = lookup_block_group(extent_root->fs_info, + cache = + btrfs_lookup_block_group(extent_root->fs_info, gang[i]); if (cache) cache->pinned++; @@ -938,7 +941,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(info->disk_super); if (hint_block) { - block_group = lookup_block_group(info, hint_block); + block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, hint_block, data, 1); } else { @@ -1118,7 +1121,7 @@ check_pending: info->extent_tree_prealloc_nr = total_found; } if (!data) { - block_group = lookup_block_group(info, ins->objectid); + block_group = btrfs_lookup_block_group(info, ins->objectid); if (block_group) { if (fill_prealloc) block_group->last_prealloc = @@ -1143,7 +1146,7 @@ new_group: else wrapped = 1; } - block_group = lookup_block_group(info, search_start); + block_group = btrfs_lookup_block_group(info, search_start); cond_resched(); if (!full_scan) block_group = btrfs_find_block_group(root, block_group, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b24a5a2be63..9749090cfbb8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -62,7 +62,6 @@ static void btrfs_read_locked_inode(struct inode *inode) struct btrfs_inode_item *inode_item; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; - struct btrfs_block_group_cache *alloc_group; u64 alloc_group_block; int ret; @@ -95,11 +94,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode->i_blocks = btrfs_inode_nblocks(inode_item); inode->i_generation = btrfs_inode_generation(inode_item); alloc_group_block = btrfs_inode_block_group(inode_item); - ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix, - (void **)&alloc_group, - alloc_group_block, 1); - BUG_ON(!ret); - BTRFS_I(inode)->block_group = alloc_group; + BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, + alloc_group_block); btrfs_free_path(path); inode_item = NULL; -- cgit v1.2.3 From 39279cc3d2704cfbf9c35dcb5bdd392159ae4625 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 06:35:45 -0400 Subject: Btrfs: split up super.c Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 4 +- fs/btrfs/TODO | 1 + fs/btrfs/ctree.h | 25 +- fs/btrfs/disk-io.c | 46 +- fs/btrfs/file.c | 783 ++++++++++++++ fs/btrfs/inode.c | 2210 ++++++++++++++++++++++++++++++++++++++ fs/btrfs/super.c | 2979 ++-------------------------------------------------- 7 files changed, 3120 insertions(+), 2928 deletions(-) create mode 100644 fs/btrfs/file.c create mode 100644 fs/btrfs/inode.c (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 5346f706b2c5..5af2426116d3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o + transaction.o bit-radix.o inode.o file.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ @@ -16,7 +16,7 @@ else KERNELDIR := /lib/modules/`uname -r`/build all:: - $(MAKE) C=1 -C $(KERNELDIR) M=`pwd` modules + $(MAKE) -C $(KERNELDIR) M=`pwd` modules clean:: rm *.o btrfs.ko endif diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index d9b6d38c603a..4a48a2365d57 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,3 +1,4 @@ +* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4e136b7b03ae..24410d42e3ce 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1123,5 +1123,28 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); -/* super.c */ +/* inode.c */ +void btrfs_delete_inode(struct inode *inode); +void btrfs_read_locked_inode(struct inode *inode); +int btrfs_write_inode(struct inode *inode, int wait); +void btrfs_dirty_inode(struct inode *inode); +struct inode *btrfs_alloc_inode(struct super_block *sb); +void btrfs_destroy_inode(struct inode *inode); +int btrfs_init_cachep(void); +void btrfs_destroy_cachep(void); +int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg); +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root); +int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to); +int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create); +/* file.c */ +extern struct file_operations btrfs_file_operations; +int btrfs_drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + u64 start, u64 end, u64 *hint_block); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a88404525e8..96bf3ef3a798 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -22,7 +22,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", bh_blocknr(buf), btrfs_header_blocknr(&node->header)); - BUG(); + return 1; } return 0; } @@ -253,7 +253,7 @@ uptodate: set_buffer_checked(bh); } if (check_tree_block(root, bh)) - BUG(); + goto fail; return bh; fail: brelse(bh); @@ -398,8 +398,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + int err = -EIO; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { + err = -ENOMEM; + goto fail; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); @@ -431,9 +436,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); + if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("failed to allocate digest hash\n"); - return NULL; + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -446,30 +453,53 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) - return NULL; + goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) - return NULL; + goto fail_sb_buffer; i_size_write(fs_info->btree_inode, btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); fs_info->disk_super = disk_super; + + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("btrfs: valid FS not found on %s\n", sb->s_id); + goto fail_sb_buffer; + } tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); - BUG_ON(!tree_root->node); + if (!tree_root->node) + goto fail_sb_buffer; mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - BUG_ON(ret); + if (ret) { + mutex_unlock(&fs_info->fs_mutex); + goto fail_tree_root; + } btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; mutex_unlock(&fs_info->fs_mutex); return tree_root; + +fail_tree_root: + btrfs_block_release(tree_root, tree_root->node); +fail_sb_buffer: + btrfs_block_release(tree_root, fs_info->sb_buffer); +fail_iput: + iput(fs_info->btree_inode); +fail: + kfree(extent_root); + kfree(tree_root); + kfree(fs_info); + return ERR_PTR(err); } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c new file mode 100644 index 000000000000..7f8e3035d99e --- /dev/null +++ b/fs/btrfs/file.c @@ -0,0 +1,783 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "ioctl.h" +#include "print-tree.h" + + +static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, + struct page **prepared_pages, + const char __user * buf) +{ + long page_fault = 0; + int i; + int offset = pos & (PAGE_CACHE_SIZE - 1); + + for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { + size_t count = min_t(size_t, + PAGE_CACHE_SIZE - offset, write_bytes); + struct page *page = prepared_pages[i]; + fault_in_pages_readable(buf, count); + + /* Copy data from userspace to the current page */ + kmap(page); + page_fault = __copy_from_user(page_address(page) + offset, + buf, count); + /* Flush processor's dcache for this page */ + flush_dcache_page(page); + kunmap(page); + buf += count; + write_bytes -= count; + + if (page_fault) + break; + } + return page_fault ? -EFAULT : 0; +} + +static void btrfs_drop_pages(struct page **pages, size_t num_pages) +{ + size_t i; + for (i = 0; i < num_pages; i++) { + if (!pages[i]) + break; + unlock_page(pages[i]); + mark_page_accessed(pages[i]); + page_cache_release(pages[i]); + } +} + +static int dirty_and_release_pages(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + size_t write_bytes) +{ + int i; + int offset; + int err = 0; + int ret; + int this_write; + struct inode *inode = file->f_path.dentry->d_inode; + struct buffer_head *bh; + struct btrfs_file_extent_item *ei; + + for (i = 0; i < num_pages; i++) { + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + /* FIXME, one block at a time */ + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + bh = page_buffers(pages[i]); + + if (buffer_mapped(bh) && bh->b_blocknr == 0) { + struct btrfs_key key; + struct btrfs_path *path; + char *ptr; + u32 datasize; + + /* create an inline extent, and copy the data in */ + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = pages[i]->index << PAGE_CACHE_SHIFT; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(write_bytes >= PAGE_CACHE_SIZE); + datasize = offset + + btrfs_file_extent_calc_inline_size(write_bytes); + + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(ret); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, bh->b_data, offset + write_bytes); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + } else if (buffer_mapped(bh)) { + /* csum the file data */ + btrfs_csum_file_block(trans, root, inode->i_ino, + pages[i]->index << PAGE_CACHE_SHIFT, + kmap(pages[i]), PAGE_CACHE_SIZE); + kunmap(pages[i]); + } + SetPageChecked(pages[i]); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + + ret = btrfs_commit_write(file, pages[i], offset, + offset + this_write); + pos += this_write; + if (ret) { + err = ret; + goto failed; + } + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } +failed: + return err; +} + +/* + * this is very complex, but the basic idea is to drop all extents + * in the range start - end. hint_block is filled in with a block number + * that would be a good hint to the block allocator for this file. + * + * If an extent intersects the range but is not entirely inside the range + * it is either truncated or split. Anything entirely inside the range + * is deleted from the tree. + */ +int btrfs_drop_extents(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, + u64 start, u64 end, u64 *hint_block) +{ + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + int slot; + struct btrfs_file_extent_item *extent; + u64 extent_end = 0; + int keep; + struct btrfs_file_extent_item old; + struct btrfs_path *path; + u64 search_start = start; + int bookend; + int found_type; + int found_extent; + int found_inline; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + while(1) { + btrfs_release_path(root, path); + ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + search_start, -1); + if (ret < 0) + goto out; + if (ret > 0) { + if (path->slots[0] == 0) { + ret = 0; + goto out; + } + path->slots[0]--; + } + keep = 0; + bookend = 0; + found_extent = 0; + found_inline = 0; + extent = NULL; + leaf = btrfs_buffer_leaf(path->nodes[0]); + slot = path->slots[0]; + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (key.offset >= end || key.objectid != inode->i_ino) { + ret = 0; + goto out; + } + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { + ret = 0; + goto out; + } + extent = btrfs_item_ptr(leaf, slot, + struct btrfs_file_extent_item); + found_type = btrfs_file_extent_type(extent); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = key.offset + + (btrfs_file_extent_num_blocks(extent) << + inode->i_blkbits); + found_extent = 1; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_inline = 1; + extent_end = key.offset + + btrfs_file_extent_inline_len(leaf->items + slot); + } + + /* we found nothing we can drop */ + if (!found_extent && !found_inline) { + ret = 0; + goto out; + } + + /* we found nothing inside the range */ + if (search_start >= extent_end) { + ret = 0; + goto out; + } + + /* FIXME, there's only one inline extent allowed right now */ + if (found_inline) { + u64 mask = root->blocksize - 1; + search_start = (extent_end + mask) & ~mask; + } else + search_start = extent_end; + + if (end < extent_end && end >= key.offset) { + if (found_extent) { + u64 disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + u64 disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + memcpy(&old, extent, sizeof(old)); + if (disk_blocknr != 0) { + ret = btrfs_inc_extent_ref(trans, root, + disk_blocknr, disk_num_blocks); + BUG_ON(ret); + } + } + WARN_ON(found_inline); + bookend = 1; + } + + /* truncate existing extent */ + if (start > key.offset) { + u64 new_num; + u64 old_num; + keep = 1; + WARN_ON(start & (root->blocksize - 1)); + if (found_extent) { + new_num = (start - key.offset) >> + inode->i_blkbits; + old_num = btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); + if (btrfs_file_extent_disk_blocknr(extent)) { + inode->i_blocks -= + (old_num - new_num) << 3; + } + btrfs_set_file_extent_num_blocks(extent, + new_num); + mark_buffer_dirty(path->nodes[0]); + } else { + WARN_ON(1); + } + } + /* delete the entire extent */ + if (!keep) { + u64 disk_blocknr = 0; + u64 disk_num_blocks = 0; + u64 extent_num_blocks = 0; + if (found_extent) { + disk_blocknr = + btrfs_file_extent_disk_blocknr(extent); + disk_num_blocks = + btrfs_file_extent_disk_num_blocks(extent); + extent_num_blocks = + btrfs_file_extent_num_blocks(extent); + *hint_block = + btrfs_file_extent_disk_blocknr(extent); + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + extent = NULL; + if (found_extent && disk_blocknr != 0) { + inode->i_blocks -= extent_num_blocks << 3; + ret = btrfs_free_extent(trans, root, + disk_blocknr, + disk_num_blocks, 0); + } + + BUG_ON(ret); + if (!bookend && search_start >= end) { + ret = 0; + goto out; + } + if (!bookend) + continue; + } + /* create bookend, splitting the extent in two */ + if (bookend && found_extent) { + struct btrfs_key ins; + ins.objectid = inode->i_ino; + ins.offset = end; + ins.flags = 0; + btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); + + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &ins, + sizeof(*extent)); + BUG_ON(ret); + extent = btrfs_item_ptr( + btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_disk_blocknr(extent, + btrfs_file_extent_disk_blocknr(&old)); + btrfs_set_file_extent_disk_num_blocks(extent, + btrfs_file_extent_disk_num_blocks(&old)); + + btrfs_set_file_extent_offset(extent, + btrfs_file_extent_offset(&old) + + ((end - key.offset) >> inode->i_blkbits)); + WARN_ON(btrfs_file_extent_num_blocks(&old) < + (extent_end - end) >> inode->i_blkbits); + btrfs_set_file_extent_num_blocks(extent, + (extent_end - end) >> inode->i_blkbits); + + btrfs_set_file_extent_type(extent, + BTRFS_FILE_EXTENT_REG); + btrfs_set_file_extent_generation(extent, + btrfs_file_extent_generation(&old)); + btrfs_mark_buffer_dirty(path->nodes[0]); + if (btrfs_file_extent_disk_blocknr(&old) != 0) { + inode->i_blocks += + btrfs_file_extent_num_blocks(extent) << 3; + } + ret = 0; + goto out; + } + } +out: + btrfs_free_path(path); + return ret; +} + +/* + * this gets pages into the page cache and locks them down + */ +static int prepare_pages(struct btrfs_root *root, + struct file *file, + struct page **pages, + size_t num_pages, + loff_t pos, + unsigned long first_index, + unsigned long last_index, + size_t write_bytes, + u64 alloc_extent_start) +{ + int i; + unsigned long index = pos >> PAGE_CACHE_SHIFT; + struct inode *inode = file->f_path.dentry->d_inode; + int offset; + int err = 0; + int this_write; + struct buffer_head *bh; + struct buffer_head *head; + loff_t isize = i_size_read(inode); + + memset(pages, 0, num_pages * sizeof(struct page *)); + + for (i = 0; i < num_pages; i++) { + pages[i] = grab_cache_page(inode->i_mapping, index + i); + if (!pages[i]) { + err = -ENOMEM; + goto failed_release; + } + cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); + wait_on_page_writeback(pages[i]); + offset = pos & (PAGE_CACHE_SIZE -1); + this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); + if (!page_has_buffers(pages[i])) { + create_empty_buffers(pages[i], + root->fs_info->sb->s_blocksize, + (1 << BH_Uptodate)); + } + head = page_buffers(pages[i]); + bh = head; + do { + err = btrfs_map_bh_to_logical(root, bh, + alloc_extent_start); + BUG_ON(err); + if (err) + goto failed_truncate; + bh = bh->b_this_page; + if (alloc_extent_start) + alloc_extent_start++; + } while (bh != head); + pos += this_write; + WARN_ON(this_write > write_bytes); + write_bytes -= this_write; + } + return 0; + +failed_release: + btrfs_drop_pages(pages, num_pages); + return err; + +failed_truncate: + btrfs_drop_pages(pages, num_pages); + if (pos > isize) + vmtruncate(inode, isize); + return err; +} + +static ssize_t btrfs_file_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + loff_t pos; + size_t num_written = 0; + int err = 0; + int ret = 0; + struct inode *inode = file->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct page *pages[8]; + struct page *pinned[2]; + unsigned long first_index; + unsigned long last_index; + u64 start_pos; + u64 num_blocks; + u64 alloc_extent_start; + u64 hint_block; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + pinned[0] = NULL; + pinned[1] = NULL; + if (file->f_flags & O_DIRECT) + return -EINVAL; + pos = *ppos; + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + current->backing_dev_info = inode->i_mapping->backing_dev_info; + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + if (err) + goto out; + if (count == 0) + goto out; + err = remove_suid(file->f_path.dentry); + if (err) + goto out; + file_update_time(file); + + start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + num_blocks = (count + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; + + mutex_lock(&inode->i_mutex); + first_index = pos >> PAGE_CACHE_SHIFT; + last_index = (pos + count) >> PAGE_CACHE_SHIFT; + + /* + * there are lots of better ways to do this, but this code + * makes sure the first and last page in the file range are + * up to date and ready for cow + */ + if ((pos & (PAGE_CACHE_SIZE - 1))) { + pinned[0] = grab_cache_page(inode->i_mapping, first_index); + if (!PageUptodate(pinned[0])) { + ret = mpage_readpage(pinned[0], btrfs_get_block); + BUG_ON(ret); + wait_on_page_locked(pinned[0]); + } else { + unlock_page(pinned[0]); + } + } + if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { + pinned[1] = grab_cache_page(inode->i_mapping, last_index); + if (!PageUptodate(pinned[1])) { + ret = mpage_readpage(pinned[1], btrfs_get_block); + BUG_ON(ret); + wait_on_page_locked(pinned[1]); + } else { + unlock_page(pinned[1]); + } + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + mutex_unlock(&root->fs_info->fs_mutex); + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + /* FIXME blocksize != 4096 */ + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + if (start_pos < inode->i_size) { + /* FIXME blocksize != pagesize */ + ret = btrfs_drop_extents(trans, root, inode, + start_pos, + (pos + count + root->blocksize -1) & + ~((u64)root->blocksize - 1), + &hint_block); + BUG_ON(ret); + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (inode->i_size + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + ret = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + BUG_ON(ret); + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || + pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, hint_block, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + } else { + ins.offset = 0; + ins.objectid = 0; + } + BUG_ON(ret); + alloc_extent_start = ins.objectid; + ret = btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + while(count > 0) { + size_t offset = pos & (PAGE_CACHE_SIZE - 1); + size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); + size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + memset(pages, 0, sizeof(pages)); + ret = prepare_pages(root, file, pages, num_pages, + pos, first_index, last_index, + write_bytes, alloc_extent_start); + BUG_ON(ret); + + /* FIXME blocks != pagesize */ + if (alloc_extent_start) + alloc_extent_start += num_pages; + ret = btrfs_copy_from_user(pos, num_pages, + write_bytes, pages, buf); + BUG_ON(ret); + + ret = dirty_and_release_pages(NULL, root, file, pages, + num_pages, pos, write_bytes); + BUG_ON(ret); + btrfs_drop_pages(pages, num_pages); + + buf += write_bytes; + count -= write_bytes; + pos += write_bytes; + num_written += write_bytes; + + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_btree_balance_dirty(root); + cond_resched(); + } +out_unlock: + mutex_unlock(&inode->i_mutex); +out: + if (pinned[0]) + page_cache_release(pinned[0]); + if (pinned[1]) + page_cache_release(pinned[1]); + *ppos = pos; + current->backing_dev_info = NULL; + mark_inode_dirty(inode); + return num_written ? num_written : err; +} + +/* + * FIXME, do this by stuffing the csum we want in the info hanging off + * page->private. For now, verify file csums on read + */ +static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, + unsigned long offset, unsigned long size) +{ + char *kaddr; + unsigned long left, count = desc->count; + struct inode *inode = page->mapping->host; + + if (size > count) + size = count; + + if (!PageChecked(page)) { + /* FIXME, do it per block */ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct buffer_head *bh; + + if (page_has_buffers(page)) { + bh = page_buffers(page); + if (!buffer_mapped(bh)) { + SetPageChecked(page); + goto checked; + } + } + + ret = btrfs_csum_verify_file_block(root, + page->mapping->host->i_ino, + page->index << PAGE_CACHE_SHIFT, + kmap(page), PAGE_CACHE_SIZE); + if (ret) { + if (ret != -ENOENT) { + printk("failed to verify ino %lu page %lu ret %d\n", + page->mapping->host->i_ino, + page->index, ret); + memset(page_address(page), 1, PAGE_CACHE_SIZE); + flush_dcache_page(page); + } + } + SetPageChecked(page); + kunmap(page); + } +checked: + /* + * Faults on the destination of a read are common, so do it before + * taking the kmap. + */ + if (!fault_in_pages_writeable(desc->arg.buf, size)) { + kaddr = kmap_atomic(page, KM_USER0); + left = __copy_to_user_inatomic(desc->arg.buf, + kaddr + offset, size); + kunmap_atomic(kaddr, KM_USER0); + if (left == 0) + goto success; + } + + /* Do it the slow way */ + kaddr = kmap(page); + left = __copy_to_user(desc->arg.buf, kaddr + offset, size); + kunmap(page); + + if (left) { + size -= left; + desc->error = -EFAULT; + } +success: + desc->count = count - size; + desc->written += size; + desc->arg.buf += size; + return size; +} + +/** + * btrfs_file_aio_read - filesystem read routine, with a mod to csum verify + * @iocb: kernel I/O control block + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @pos: current file position + */ +static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *filp = iocb->ki_filp; + ssize_t retval; + unsigned long seg; + size_t count; + loff_t *ppos = &iocb->ki_pos; + + count = 0; + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + count += iv->iov_len; + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + nr_segs = seg; + count -= iv->iov_len; /* This segment is no good */ + break; + } + retval = 0; + if (count) { + for (seg = 0; seg < nr_segs; seg++) { + read_descriptor_t desc; + + desc.written = 0; + desc.arg.buf = iov[seg].iov_base; + desc.count = iov[seg].iov_len; + if (desc.count == 0) + continue; + desc.error = 0; + do_generic_file_read(filp, ppos, &desc, + btrfs_read_actor); + retval += desc.written; + if (desc.error) { + retval = retval ?: desc.error; + break; + } + } + } + return retval; +} + +static int btrfs_sync_file(struct file *file, + struct dentry *dentry, int datasync) +{ + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + /* + * FIXME, use inode generation number to check if we can skip the + * commit + */ + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + ret = -ENOMEM; + goto out; + } + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); +out: + return ret > 0 ? EIO : ret; +} + +struct file_operations btrfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .aio_read = btrfs_file_aio_read, + .write = btrfs_file_write, + .mmap = generic_file_mmap, + .open = generic_file_open, + .ioctl = btrfs_ioctl, + .fsync = btrfs_sync_file, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif +}; + diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c new file mode 100644 index 000000000000..62a3a778d37d --- /dev/null +++ b/fs/btrfs/inode.c @@ -0,0 +1,2210 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "transaction.h" +#include "btrfs_inode.h" +#include "ioctl.h" +#include "print-tree.h" + +struct btrfs_iget_args { + u64 ino; + struct btrfs_root *root; +}; + +static struct inode_operations btrfs_dir_inode_operations; +static struct inode_operations btrfs_symlink_inode_operations; +static struct inode_operations btrfs_dir_ro_inode_operations; +static struct inode_operations btrfs_file_inode_operations; +static struct address_space_operations btrfs_aops; +static struct address_space_operations btrfs_symlink_aops; +static struct file_operations btrfs_dir_file_operations; + +static struct kmem_cache *btrfs_inode_cachep; +struct kmem_cache *btrfs_trans_handle_cachep; +struct kmem_cache *btrfs_transaction_cachep; +struct kmem_cache *btrfs_bit_radix_cachep; +struct kmem_cache *btrfs_path_cachep; + +#define S_SHIFT 12 +static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { + [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, + [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, + [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, +}; + +void btrfs_read_locked_inode(struct inode *inode) +{ + struct btrfs_path *path; + struct btrfs_inode_item *inode_item; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_key location; + u64 alloc_group_block; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + mutex_lock(&root->fs_info->fs_mutex); + + memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); + ret = btrfs_lookup_inode(NULL, root, path, &location, 0); + if (ret) { + btrfs_free_path(path); + goto make_bad; + } + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(inode_item); + inode->i_nlink = btrfs_inode_nlink(inode_item); + inode->i_uid = btrfs_inode_uid(inode_item); + inode->i_gid = btrfs_inode_gid(inode_item); + inode->i_size = btrfs_inode_size(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); + inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); + inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); + inode->i_blocks = btrfs_inode_nblocks(inode_item); + inode->i_generation = btrfs_inode_generation(inode_item); + alloc_group_block = btrfs_inode_block_group(inode_item); + BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, + alloc_group_block); + + btrfs_free_path(path); + inode_item = NULL; + + mutex_unlock(&root->fs_info->fs_mutex); + + switch (inode->i_mode & S_IFMT) { +#if 0 + default: + init_special_inode(inode, inode->i_mode, + btrfs_inode_rdev(inode_item)); + break; +#endif + case S_IFREG: + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + break; + case S_IFDIR: + inode->i_fop = &btrfs_dir_file_operations; + if (root == root->fs_info->tree_root) + inode->i_op = &btrfs_dir_ro_inode_operations; + else + inode->i_op = &btrfs_dir_inode_operations; + break; + case S_IFLNK: + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + break; + } + return; + +make_bad: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + make_bad_inode(inode); +} + +static void fill_inode_item(struct btrfs_inode_item *item, + struct inode *inode) +{ + btrfs_set_inode_uid(item, inode->i_uid); + btrfs_set_inode_gid(item, inode->i_gid); + btrfs_set_inode_size(item, inode->i_size); + btrfs_set_inode_mode(item, inode->i_mode); + btrfs_set_inode_nlink(item, inode->i_nlink); + btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); + btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); + btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); + btrfs_set_inode_nblocks(item, inode->i_blocks); + btrfs_set_inode_generation(item, inode->i_generation); + btrfs_set_inode_block_group(item, + BTRFS_I(inode)->block_group->key.objectid); +} + +static int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_inode_item *inode_item; + struct btrfs_path *path; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 1); + if (ret) { + if (ret > 0) + ret = -ENOENT; + goto failed; + } + + inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_inode_item); + + fill_inode_item(inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + ret = 0; +failed: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + + +static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_path *path; + const char *name = dentry->d_name.name; + int name_len = dentry->d_name.len; + int ret = 0; + u64 objectid; + struct btrfs_dir_item *di; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + objectid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + objectid, name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto err; + } + if (!di) { + ret = -ENOENT; + goto err; + } + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + + dentry->d_inode->i_ctime = dir->i_ctime; +err: + btrfs_free_path(path); + if (!ret) { + dir->i_size -= name_len * 2; + btrfs_update_inode(trans, root, dir); + drop_nlink(dentry->d_inode); + btrfs_update_inode(trans, root, dentry->d_inode); + dir->i_sb->s_dirt = 1; + } + return ret; +} + +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root; + struct btrfs_trans_handle *trans; + int ret; + + root = BTRFS_I(dir)->root; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + ret = btrfs_unlink_trans(trans, root, dir, dentry); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return ret; +} + +static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err; + int ret; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + int found_type; + struct btrfs_leaf *leaf; + char *goodnames = ".."; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = (u32)-1; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (path->slots[0] == 0) { + err = -ENOENT; + goto out; + } + path->slots[0]--; + leaf = btrfs_buffer_leaf(path->nodes[0]); + btrfs_disk_key_to_cpu(&found_key, + &leaf->items[path->slots[0]].key); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino) { + err = -ENOENT; + goto out; + } + if ((found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY) || + (!btrfs_match_dir_item_name(root, path, goodnames, 2) && + !btrfs_match_dir_item_name(root, path, goodnames, 1))) { + err = -ENOTEMPTY; + goto out; + } + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + + if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) + break; + btrfs_release_path(root, path); + } + ret = 0; + btrfs_release_path(root, path); + + /* now the directory is empty */ + err = btrfs_unlink_trans(trans, root, dir, dentry); + if (!err) { + inode->i_size = 0; + } +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); + if (ret && !err) + err = ret; + return err; +} + +static int btrfs_free_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + struct btrfs_path *path; + int ret; + + clear_inode(inode); + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, -1); + BUG_ON(ret); + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_free_path(path); + return ret; +} + +/* + * truncates go from a high offset to a low offset. So, walk + * from hi to lo in the node and issue readas. Stop when you find + * keys from a different objectid + */ +static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + int nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + if (slot == 0) + return; + nritems = btrfs_header_nritems(&node->header); + for (i = slot - 1; i >= 0; i--) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} + +/* + * this can truncate away extent items, csum items and directory items. + * It starts at a high offset and removes keys until it can't find + * any higher than i_size. + * + * csum items that cross the new i_size are truncated to the new size + * as well. + */ +static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode) +{ + int ret; + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_disk_key *found_key; + u32 found_type; + struct btrfs_leaf *leaf; + struct btrfs_file_extent_item *fi; + u64 extent_start = 0; + u64 extent_num_blocks = 0; + u64 item_end = 0; + int found_extent; + int del_item; + + path = btrfs_alloc_path(); + BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ + key.objectid = inode->i_ino; + key.offset = (u64)-1; + key.flags = (u32)-1; + while(1) { + btrfs_init_path(path); + fi = NULL; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) { + goto error; + } + if (ret > 0) { + BUG_ON(path->slots[0] == 0); + path->slots[0]--; + } + reada_truncate(root, path, inode->i_ino); + leaf = btrfs_buffer_leaf(path->nodes[0]); + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + + if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + break; + if (found_type != BTRFS_CSUM_ITEM_KEY && + found_type != BTRFS_DIR_ITEM_KEY && + found_type != BTRFS_DIR_INDEX_KEY && + found_type != BTRFS_EXTENT_DATA_KEY) + break; + + item_end = btrfs_disk_key_offset(found_key); + if (found_type == BTRFS_EXTENT_DATA_KEY) { + fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + item_end += btrfs_file_extent_num_blocks(fi) << + inode->i_blkbits; + } + } + if (found_type == BTRFS_CSUM_ITEM_KEY) { + ret = btrfs_csum_truncate(trans, root, path, + inode->i_size); + BUG_ON(ret); + } + if (item_end < inode->i_size) { + if (found_type) { + btrfs_set_key_type(&key, found_type - 1); + continue; + } + break; + } + if (btrfs_disk_key_offset(found_key) >= inode->i_size) + del_item = 1; + else + del_item = 0; + found_extent = 0; + + /* FIXME, shrink the extent if the ref count is only 1 */ + if (found_type == BTRFS_EXTENT_DATA_KEY && + btrfs_file_extent_type(fi) != + BTRFS_FILE_EXTENT_INLINE) { + u64 num_dec; + if (!del_item) { + u64 orig_num_blocks = + btrfs_file_extent_num_blocks(fi); + extent_num_blocks = inode->i_size - + btrfs_disk_key_offset(found_key) + + root->blocksize - 1; + extent_num_blocks >>= inode->i_blkbits; + btrfs_set_file_extent_num_blocks(fi, + extent_num_blocks); + inode->i_blocks -= (orig_num_blocks - + extent_num_blocks) << 3; + mark_buffer_dirty(path->nodes[0]); + } else { + extent_start = + btrfs_file_extent_disk_blocknr(fi); + extent_num_blocks = + btrfs_file_extent_disk_num_blocks(fi); + /* FIXME blocksize != 4096 */ + num_dec = btrfs_file_extent_num_blocks(fi) << 3; + if (extent_start != 0) { + found_extent = 1; + inode->i_blocks -= num_dec; + } + } + } + if (del_item) { + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + } else { + break; + } + btrfs_release_path(root, path); + if (found_extent) { + ret = btrfs_free_extent(trans, root, extent_start, + extent_num_blocks, 0); + BUG_ON(ret); + } + } + ret = 0; +error: + btrfs_release_path(root, path); + btrfs_free_path(path); + inode->i_sb->s_dirt = 1; + return ret; +} + +/* + * taken from block_truncate_page, but does cow as it zeros out + * any bytes left in the last page in the file. + */ +static int btrfs_truncate_page(struct address_space *mapping, loff_t from) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + char *kaddr; + int ret = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 alloc_hint = 0; + struct btrfs_key ins; + struct btrfs_trans_handle *trans; + + if ((offset & (blocksize - 1)) == 0) + goto out; + + ret = -ENOMEM; + page = grab_cache_page(mapping, index); + if (!page) + goto out; + + if (!PageUptodate(page)) { + ret = mpage_readpage(page, btrfs_get_block); + lock_page(page); + if (!PageUptodate(page)) { + ret = -EIO; + goto out; + } + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_drop_extents(trans, root, inode, + page->index << PAGE_CACHE_SHIFT, + (page->index + 1) << PAGE_CACHE_SHIFT, + &alloc_hint); + BUG_ON(ret); + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, + alloc_hint, (u64)-1, &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + ins.objectid, 1, 1); + BUG_ON(ret); + SetPageChecked(page); + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + btrfs_csum_file_block(trans, root, inode->i_ino, + page->index << PAGE_CACHE_SHIFT, + kaddr, PAGE_CACHE_SIZE); + kunmap(page); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); +out: + return ret; +} + +static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = inode_change_ok(inode, attr); + if (err) + return err; + + if (S_ISREG(inode->i_mode) && + attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 mask = root->blocksize - 1; + u64 pos = (inode->i_size + mask) & ~mask; + u64 hole_size; + + if (attr->ia_size <= pos) + goto out; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + + hole_size = (attr->ia_size - pos + mask) & ~mask; + hole_size >>= inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + pos, 0, 0, hole_size); + BUG_ON(err); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } +out: + err = inode_setattr(inode, attr); + + return err; +} +void btrfs_delete_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) { + goto no_delete; + } + inode->i_size = 0; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + btrfs_free_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return; +no_delete: + clear_inode(inode); +} + +/* + * this returns the key found in the dir entry in the location pointer. + * If no dir entries were found, location->objectid is 0. + */ +static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, + struct btrfs_key *location) +{ + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(dir)->root; + int ret; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, + namelen, 0); + if (!di || IS_ERR(di)) { + location->objectid = 0; + ret = 0; + goto out; + } + btrfs_disk_key_to_cpu(location, &di->location); +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + return ret; +} + +/* + * when we hit a tree root in a directory, the btrfs part of the inode + * needs to be changed to reflect the root directory of the tree root. This + * is kind of like crossing a mount point. + */ +static int fixup_tree_root_location(struct btrfs_root *root, + struct btrfs_key *location, + struct btrfs_root **sub_root) +{ + struct btrfs_path *path; + struct btrfs_root_item *ri; + + if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) + return 0; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return 0; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + + *sub_root = btrfs_read_fs_root(root->fs_info, location); + if (IS_ERR(*sub_root)) + return PTR_ERR(*sub_root); + + ri = &(*sub_root)->root_item; + location->objectid = btrfs_root_dirid(ri); + location->flags = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + location->offset = 0; + + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static int btrfs_init_locked_inode(struct inode *inode, void *p) +{ + struct btrfs_iget_args *args = p; + inode->i_ino = args->ino; + BTRFS_I(inode)->root = args->root; + return 0; +} + +static int btrfs_find_actor(struct inode *inode, void *opaque) +{ + struct btrfs_iget_args *args = opaque; + return (args->ino == inode->i_ino && + args->root == BTRFS_I(inode)->root); +} + +struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, + struct btrfs_root *root) +{ + struct inode *inode; + struct btrfs_iget_args args; + args.ino = objectid; + args.root = root; + + inode = iget5_locked(s, objectid, btrfs_find_actor, + btrfs_init_locked_inode, + (void *)&args); + return inode; +} + +static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct inode * inode; + struct btrfs_inode *bi = BTRFS_I(dir); + struct btrfs_root *root = bi->root; + struct btrfs_root *sub_root = root; + struct btrfs_key location; + int ret; + + if (dentry->d_name.len > BTRFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_inode_by_name(dir, dentry, &location); + mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) + return ERR_PTR(ret); + inode = NULL; + if (location.objectid) { + ret = fixup_tree_root_location(root, &location, &sub_root); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return ERR_PTR(-ENOENT); + inode = btrfs_iget_locked(dir->i_sb, location.objectid, + sub_root); + if (!inode) + return ERR_PTR(-EACCES); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + if (sub_root != root) { + igrab(inode); + sub_root->inode = inode; + } + BTRFS_I(inode)->root = sub_root; + memcpy(&BTRFS_I(inode)->location, &location, + sizeof(location)); + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + } + } + return d_splice_alias(inode, dentry); +} + +/* + * readahead one full node of leaves as long as their keys include + * the objectid supplied + */ +static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, + u64 objectid) +{ + struct btrfs_node *node; + int i; + u32 nritems; + u64 item_objectid; + u64 blocknr; + int slot; + int ret; + + if (!path->nodes[1]) + return; + node = btrfs_buffer_node(path->nodes[1]); + slot = path->slots[1]; + nritems = btrfs_header_nritems(&node->header); + for (i = slot + 1; i < nritems; i++) { + item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); + if (item_objectid != objectid) + break; + blocknr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, blocknr); + if (ret) + break; + } +} +static unsigned char btrfs_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +}; + +static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_item *item; + struct btrfs_dir_item *di; + struct btrfs_key key; + struct btrfs_path *path; + int ret; + u32 nritems; + struct btrfs_leaf *leaf; + int slot; + int advance; + unsigned char d_type; + int over = 0; + u32 di_cur; + u32 di_total; + u32 di_len; + int key_type = BTRFS_DIR_INDEX_KEY; + + /* FIXME, use a real flag for deciding about the key type */ + if (root->fs_info->tree_root == root) + key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, key_type); + key.offset = filp->f_pos; + path = btrfs_alloc_path(); + btrfs_init_path(path); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + advance = 0; + reada_leaves(root, path, inode->i_ino); + while(1) { + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + if (advance || slot >= nritems) { + if (slot >= nritems -1) { + reada_leaves(root, path, inode->i_ino); + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } else { + slot++; + path->slots[0]++; + } + } + advance = 1; + item = leaf->items + slot; + if (btrfs_disk_key_objectid(&item->key) != key.objectid) + break; + if (btrfs_disk_key_type(&item->key) != key_type) + break; + if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + continue; + filp->f_pos = btrfs_disk_key_offset(&item->key); + advance = 1; + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + di_cur = 0; + di_total = btrfs_item_size(leaf->items + slot); + while(di_cur < di_total) { + d_type = btrfs_filetype_table[btrfs_dir_type(di)]; + over = filldir(dirent, (const char *)(di + 1), + btrfs_dir_name_len(di), + btrfs_disk_key_offset(&item->key), + btrfs_disk_key_objectid(&di->location), + d_type); + if (over) + goto nopos; + di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_cur += di_len; + di = (struct btrfs_dir_item *)((char *)di + di_len); + } + } + filp->f_pos++; +nopos: + ret = 0; +err: + btrfs_release_path(root, path); + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +int btrfs_write_inode(struct inode *inode, int wait) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (wait) { + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_commit_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + } + return ret; +} + +/* + * This is somewhat expense, updating the tree every time the + * inode changes. But, it is most likely to find the inode in cache. + * FIXME, needs more benchmarking...there are no reasons other than performance + * to keep or drop this code. + */ +void btrfs_dirty_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); +} + +static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, + struct btrfs_block_group_cache *group, + int mode) +{ + struct inode *inode; + struct btrfs_inode_item inode_item; + struct btrfs_key *location; + int ret; + int owner; + + inode = new_inode(root->fs_info->sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) + owner = 0; + else + owner = 1; + group = btrfs_find_block_group(root, group, 0, 0, owner); + BTRFS_I(inode)->block_group = group; + + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_mode = mode; + inode->i_ino = objectid; + inode->i_blocks = 0; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + fill_inode_item(&inode_item, inode); + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->flags = 0; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_inode(trans, root, objectid, &inode_item); + BUG_ON(ret); + + insert_inode_hash(inode); + return inode; +} + +static inline u8 btrfs_inode_type(struct inode *inode) +{ + return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; +} + +static int btrfs_add_link(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int ret; + struct btrfs_key key; + struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; + key.objectid = inode->i_ino; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + ret = btrfs_insert_dir_item(trans, root, + dentry->d_name.name, dentry->d_name.len, + dentry->d_parent->d_inode->i_ino, + &key, btrfs_inode_type(inode)); + if (ret == 0) { + dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; + ret = btrfs_update_inode(trans, root, + dentry->d_parent->d_inode); + } + return ret; +} + +static int btrfs_add_nondir(struct btrfs_trans_handle *trans, + struct dentry *dentry, struct inode *inode) +{ + int err = btrfs_add_link(trans, dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + if (err > 0) + err = -EEXIST; + return err; +} + +static int btrfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, mode); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static int btrfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct inode *inode = old_dentry->d_inode; + int err; + int drop_inode = 0; + + if (inode->i_nlink == 0) + return -ENOENT; + + inc_nlink(inode); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + atomic_inc(&inode->i_count); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, dir); + btrfs_update_inode(trans, root, inode); + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 objectid, u64 dirid) +{ + int ret; + char buf[2]; + struct btrfs_key key; + + buf[0] = '.'; + buf[1] = '.'; + + key.objectid = objectid; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, + &key, BTRFS_FT_DIR); + if (ret) + goto error; + key.objectid = dirid; + ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, + &key, BTRFS_FT_DIR); + if (ret) + goto error; +error: + return ret; +} + +static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *inode; + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + int err = 0; + int drop_on_err = 0; + u64 objectid; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out_unlock; + } + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFDIR | mode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_fail; + } + drop_on_err = 1; + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); + if (err) + goto out_fail; + + inode->i_size = 6; + err = btrfs_update_inode(trans, root, inode); + if (err) + goto out_fail; + err = btrfs_add_link(trans, dentry, inode); + if (err) + goto out_fail; + d_instantiate(dentry, inode); + drop_on_err = 0; + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + +out_fail: + btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + if (drop_on_err) + iput(inode); + btrfs_btree_balance_dirty(root); + return err; +} + +/* + * FIBMAP and others want to pass in a fake buffer head. They need to + * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy + * any packed file data into the fake bh + */ +#define BTRFS_GET_BLOCK_NO_CREATE 0 +#define BTRFS_GET_BLOCK_CREATE 1 +#define BTRFS_GET_BLOCK_NO_DIRECT 2 + +/* + * FIXME create==1 doe not work. + */ +static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + u32 found_type; + u64 alloc_hint = 0; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + BUG_ON(!path); + btrfs_init_path(path); + if (create & BTRFS_GET_BLOCK_CREATE) { + WARN_ON(1); + /* this almost but not quite works */ + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out; + } + ret = btrfs_drop_extents(trans, root, inode, + iblock << inode->i_blkbits, + (iblock + 1) << inode->i_blkbits, + &alloc_hint); + BUG_ON(ret); + } + + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, + iblock << inode->i_blkbits, 0); + if (ret < 0) { + err = ret; + goto out; + } + + if (ret != 0) { + if (path->slots[0] == 0) { + btrfs_release_path(root, path); + goto not_found; + } + path->slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path->nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + if (btrfs_disk_key_objectid(found_key) != objectid || + found_type != BTRFS_EXTENT_DATA_KEY) { + extent_end = 0; + extent_start = 0; + goto not_found; + } + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_start = extent_start >> inode->i_blkbits; + extent_end = extent_start + btrfs_file_extent_num_blocks(item); + err = 0; + if (btrfs_file_extent_disk_blocknr(item) == 0) + goto out; + if (iblock >= extent_start && iblock < extent_end) { + btrfs_map_bh_to_logical(root, result, blocknr + + iblock - extent_start); + goto out; + } + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + + if (create & BTRFS_GET_BLOCK_NO_DIRECT) { + err = -EINVAL; + goto out; + } + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = (extent_start + size) >> inode->i_blkbits; + extent_start >>= inode->i_blkbits; + if (iblock < extent_start || iblock > extent_end) { + goto not_found; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(result->b_page); + memcpy(map, ptr, size); + memset(map + size, 0, PAGE_CACHE_SIZE - size); + flush_dcache_page(result->b_page); + kunmap(result->b_page); + set_buffer_uptodate(result); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, 0); + } +not_found: + if (create & BTRFS_GET_BLOCK_CREATE) { + struct btrfs_key ins; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, + 1, alloc_hint, (u64)-1, + &ins, 1); + BUG_ON(ret); + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + iblock << inode->i_blkbits, + ins.objectid, ins.offset, + ins.offset); + BUG_ON(ret); + SetPageChecked(result->b_page); + btrfs_map_bh_to_logical(root, result, ins.objectid); + } +out: + if (trans) + err = btrfs_end_transaction(trans, root); + btrfs_free_path(path); + return err; +} + +int btrfs_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + int err; + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + err = btrfs_get_block_lock(inode, iblock, result, create); + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + +static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + mutex_lock(&root->fs_info->fs_mutex); + btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + +static sector_t btrfs_bmap(struct address_space *as, sector_t block) +{ + return generic_block_bmap(as, block, btrfs_get_block_bmap); +} + +static int btrfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return block_prepare_write(page, from, to, btrfs_get_block); +} + +static int btrfs_readpage(struct file *file, struct page *page) +{ + return mpage_readpage(page, btrfs_get_block); +} + +/* + * Aside from a tiny bit of packed file data handling, this is the + * same as the generic code. + * + * While block_write_full_page is writing back the dirty buffers under + * the page lock, whoever dirtied the buffers may decide to clean them + * again at any time. We handle that by only looking at the buffer + * state inside lock_buffer(). + * + * If block_write_full_page() is called for regular writeback + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a + * locked buffer. This only can happen if someone has written the buffer + * directly, with submit_bh(). At the address_space level PageWriteback + * prevents this contention from occurring. + */ +static int __btrfs_write_full_page(struct inode *inode, struct page *page, + struct writeback_control *wbc) +{ + int err; + sector_t block; + sector_t last_block; + struct buffer_head *bh, *head; + const unsigned blocksize = 1 << inode->i_blkbits; + int nr_underway = 0; + + BUG_ON(!PageLocked(page)); + + last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + head = page_buffers(page); + bh = head; + + /* + * Get all the dirty buffers mapped to disk addresses and + * handle any aliases from the underlying blockdev's mapping. + */ + do { + if (block > last_block) { + /* + * mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. + */ + /* + * The buffer was zeroed by block_write_full_page() + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { + WARN_ON(bh->b_size != blocksize); + err = btrfs_get_block(inode, block, bh, 0); + if (err) { + goto recover; + } + if (buffer_new(bh)) { + /* blockdev mappings never come here */ + clear_buffer_new(bh); + } + } + bh = bh->b_this_page; + block++; + } while (bh != head); + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (test_set_buffer_locked(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + + err = 0; +done: + if (nr_underway == 0) { + /* + * The page was marked dirty, but the buffers were + * clean. Someone wrote them back by hand with + * ll_rw_block/submit_bh. A rare case. + */ + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + break; + } + bh = bh->b_this_page; + } while (bh != head); + if (uptodate) + SetPageUptodate(page); + end_page_writeback(page); + } + return err; + +recover: + /* + * ENOSPC, or some other error. We may already have added some + * blocks to the file, so we need to write these out to avoid + * exposing stale data. + * The page is currently locked and not marked for writeback + */ + bh = head; + /* Recovery: lock and submit the mapped buffers */ + do { + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * The buffer may have been set dirty during + * attachment to a dirty page. + */ + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); + goto done; +} + +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct inode * const inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; + unsigned offset; + void *kaddr; + + /* Is the page fully inside i_size? */ + if (page->index < end_index) + return __btrfs_write_full_page(inode, page, wbc); + + /* Is the page fully outside i_size? (truncate in progress) */ + offset = i_size & (PAGE_CACHE_SIZE-1); + if (page->index >= end_index+1 || !offset) { + /* + * The page may have dirty, unmapped buffers. For example, + * they may have been added in ext3_writepage(). Make them + * freeable here, so the page does not leak. + */ + block_invalidatepage(page, 0); + unlock_page(page); + return 0; /* don't care */ + } + + /* + * The page straddles i_size. It must be zeroed out on each and every + * writepage invokation because it may be mmapped. "A file is mapped + * in multiples of the page size. For a file that is not a multiple of + * the page size, the remaining memory is zeroed when mapped, and + * writes to that region are not written out to the file." + */ + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + return __btrfs_write_full_page(inode, page, wbc); +} + +static void btrfs_truncate(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + struct btrfs_trans_handle *trans; + + if (!S_ISREG(inode->i_mode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + + btrfs_truncate_page(inode->i_mapping, inode->i_size); + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + /* FIXME, add redo link to tree so we don't leak on crash */ + ret = btrfs_truncate_in_trans(trans, root, inode); + BUG_ON(ret); + btrfs_update_inode(trans, root, inode); + ret = btrfs_end_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); +} + +int btrfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + SetPageUptodate(page); + bh = page_buffers(page); + set_buffer_uptodate(bh); + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + set_page_dirty(page); + } + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} + +static int create_subvol(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item root_item; + struct btrfs_inode_item *inode_item; + struct buffer_head *subvol; + struct btrfs_leaf *leaf; + struct btrfs_root *new_root; + struct inode *inode; + struct inode *dir; + int ret; + u64 objectid; + u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + subvol = btrfs_alloc_free_block(trans, root, 0); + if (subvol == NULL) + return -ENOSPC; + leaf = btrfs_buffer_leaf(subvol); + btrfs_set_header_nritems(&leaf->header, 0); + btrfs_set_header_level(&leaf->header, 0); + btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); + btrfs_set_header_generation(&leaf->header, trans->transid); + btrfs_set_header_owner(&leaf->header, root->root_key.objectid); + memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, + sizeof(leaf->header.fsid)); + mark_buffer_dirty(subvol); + + inode_item = &root_item.inode; + memset(inode_item, 0, sizeof(*inode_item)); + btrfs_set_inode_generation(inode_item, 1); + btrfs_set_inode_size(inode_item, 3); + btrfs_set_inode_nlink(inode_item, 1); + btrfs_set_inode_nblocks(inode_item, 1); + btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + + btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_refs(&root_item, 1); + brelse(subvol); + subvol = NULL; + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + btrfs_set_root_dirid(&root_item, new_dirid); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + dir = root->fs_info->sb->s_root->d_inode; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, dir->i_ino, &key, + BTRFS_FT_DIR); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + + new_root = btrfs_read_fs_root(root->fs_info, &key); + BUG_ON(!new_root); + + trans = btrfs_start_transaction(new_root, 1); + BUG_ON(!trans); + + inode = btrfs_new_inode(trans, new_root, new_dirid, + BTRFS_I(dir)->block_group, S_IFDIR | 0700); + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; + + ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); + BUG_ON(ret); + + inode->i_nlink = 1; + inode->i_size = 6; + ret = btrfs_update_inode(trans, new_root, inode); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, new_root); + BUG_ON(ret); + + iput(inode); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return 0; +} + +static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key; + struct btrfs_root_item new_root_item; + int ret; + u64 objectid; + + if (!root->ref_cows) + return -EINVAL; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + BUG_ON(!trans); + + ret = btrfs_update_inode(trans, root, root->inode); + BUG_ON(ret); + + ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, + 0, &objectid); + BUG_ON(ret); + + memcpy(&new_root_item, &root->root_item, + sizeof(new_root_item)); + + key.objectid = objectid; + key.offset = 1; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + &new_root_item); + BUG_ON(ret); + + /* + * insert the directory item + */ + key.offset = (u64)-1; + ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, + name, namelen, + root->fs_info->sb->s_root->d_inode->i_ino, + &key, BTRFS_FT_DIR); + + BUG_ON(ret); + + ret = btrfs_inc_root_ref(trans, root); + BUG_ON(ret); + + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); + return 0; +} + +int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int + cmd, unsigned long arg) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_vol_args vol_args; + int ret = 0; + struct btrfs_dir_item *di; + int namelen; + struct btrfs_path *path; + u64 root_dirid; + + switch (cmd) { + case BTRFS_IOC_SNAP_CREATE: + if (copy_from_user(&vol_args, + (struct btrfs_ioctl_vol_args __user *)arg, + sizeof(vol_args))) + return -EFAULT; + namelen = strlen(vol_args.name); + if (namelen > BTRFS_VOL_NAME_MAX) + return -EINVAL; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, + mutex_lock(&root->fs_info->fs_mutex); + di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, + path, root_dirid, + vol_args.name, namelen, 0); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + if (di && !IS_ERR(di)) + return -EEXIST; + + if (root == root->fs_info->tree_root) + ret = create_subvol(root, vol_args.name, namelen); + else + ret = create_snapshot(root, vol_args.name, namelen); + WARN_ON(ret); + break; + default: + return -ENOTTY; + } + return ret; +} + +#ifdef CONFIG_COMPAT +long btrfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct inode *inode = file->f_path.dentry->d_inode; + int ret; + lock_kernel(); + ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); + unlock_kernel(); + return ret; + +} +#endif + +/* + * Called inside transaction, so use GFP_NOFS + */ +struct inode *btrfs_alloc_inode(struct super_block *sb) +{ + struct btrfs_inode *ei; + + ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); + if (!ei) + return NULL; + return &ei->vfs_inode; +} + +void btrfs_destroy_inode(struct inode *inode) +{ + WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(inode->i_data.nrpages); + + kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} + +static void init_once(void * foo, struct kmem_cache * cachep, + unsigned long flags) +{ + struct btrfs_inode *ei = (struct btrfs_inode *) foo; + + inode_init_once(&ei->vfs_inode); +} + +void btrfs_destroy_cachep(void) +{ + if (btrfs_inode_cachep) + kmem_cache_destroy(btrfs_inode_cachep); + if (btrfs_trans_handle_cachep) + kmem_cache_destroy(btrfs_trans_handle_cachep); + if (btrfs_transaction_cachep) + kmem_cache_destroy(btrfs_transaction_cachep); + if (btrfs_bit_radix_cachep) + kmem_cache_destroy(btrfs_bit_radix_cachep); + if (btrfs_path_cachep) + kmem_cache_destroy(btrfs_path_cachep); +} + +int btrfs_init_cachep(void) +{ + btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", + sizeof(struct btrfs_inode), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); + if (!btrfs_inode_cachep) + goto fail; + btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", + sizeof(struct btrfs_trans_handle), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_trans_handle_cachep) + goto fail; + btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_transaction_cachep) + goto fail; + btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", + sizeof(struct btrfs_transaction), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL, NULL); + if (!btrfs_path_cachep) + goto fail; + btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", + 256, + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD | + SLAB_DESTROY_BY_RCU), + NULL, NULL); + if (!btrfs_bit_radix_cachep) + goto fail; + return 0; +fail: + btrfs_destroy_cachep(); + return -ENOMEM; +} + +static int btrfs_getattr(struct vfsmount *mnt, + struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + generic_fillattr(inode, stat); + stat->blksize = 256 * 1024; + return 0; +} + +static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir,struct dentry *new_dentry) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct inode *new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct timespec ctime = CURRENT_TIME; + struct btrfs_path *path; + struct btrfs_dir_item *di; + int ret; + + if (S_ISDIR(old_inode->i_mode) && new_inode && + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + return -ENOTEMPTY; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out_fail; + } + + old_dentry->d_inode->i_nlink++; + old_dir->i_ctime = old_dir->i_mtime = ctime; + new_dir->i_ctime = new_dir->i_mtime = ctime; + old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { + struct btrfs_key *location = &BTRFS_I(new_dir)->location; + u64 old_parent_oid; + di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + old_parent_oid = btrfs_disk_key_objectid(&di->location); + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, + old_inode->i_ino, + old_parent_oid, + "..", 2, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out_fail; + } + if (!di) { + ret = -ENOENT; + goto out_fail; + } + ret = btrfs_del_item(trans, root, path); + if (ret) { + ret = -EIO; + goto out_fail; + } + btrfs_release_path(root, path); + + ret = btrfs_insert_dir_item(trans, root, "..", 2, + old_inode->i_ino, location, + BTRFS_FT_DIR); + if (ret) + goto out_fail; + } + + + ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); + if (ret) + goto out_fail; + + if (new_inode) { + new_inode->i_ctime = CURRENT_TIME; + ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); + if (ret) + goto out_fail; + if (S_ISDIR(new_inode->i_mode)) + clear_nlink(new_inode); + else + drop_nlink(new_inode); + btrfs_update_inode(trans, root, new_inode); + } + ret = btrfs_add_link(trans, new_dentry, old_inode); + if (ret) + goto out_fail; + +out_fail: + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +static int btrfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_key key; + struct inode *inode; + int err; + int drop_inode = 0; + u64 objectid; + int name_len; + int datasize; + char *ptr; + struct btrfs_file_extent_item *ei; + + name_len = strlen(symname) + 1; + if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) + return -ENAMETOOLONG; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); + + err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); + if (err) { + err = -ENOSPC; + goto out_unlock; + } + + inode = btrfs_new_inode(trans, root, objectid, + BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); + err = PTR_ERR(inode); + if (IS_ERR(inode)) + goto out_unlock; + + btrfs_set_trans_block_group(trans, inode); + err = btrfs_add_nondir(trans, dentry, inode); + if (err) + drop_inode = 1; + else { + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + } + dir->i_sb->s_dirt = 1; + btrfs_update_inode_block_group(trans, inode); + btrfs_update_inode_block_group(trans, dir); + if (drop_inode) + goto out_unlock; + + path = btrfs_alloc_path(); + BUG_ON(!path); + key.objectid = inode->i_ino; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + datasize = btrfs_file_extent_calc_inline_size(name_len); + err = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + BUG_ON(err); + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, symname, name_len); + mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; + inode->i_mapping->a_ops = &btrfs_symlink_aops; + inode->i_size = name_len - 1; + btrfs_update_inode(trans, root, inode); + err = 0; + +out_unlock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + + if (drop_inode) { + inode_dec_link_count(inode); + iput(inode); + } + btrfs_btree_balance_dirty(root); + return err; +} + +static struct inode_operations btrfs_dir_inode_operations = { + .lookup = btrfs_lookup, + .create = btrfs_create, + .unlink = btrfs_unlink, + .link = btrfs_link, + .mkdir = btrfs_mkdir, + .rmdir = btrfs_rmdir, + .rename = btrfs_rename, + .symlink = btrfs_symlink, + .setattr = btrfs_setattr, +}; + +static struct inode_operations btrfs_dir_ro_inode_operations = { + .lookup = btrfs_lookup, +}; + +static struct file_operations btrfs_dir_file_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = btrfs_readdir, + .ioctl = btrfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = btrfs_compat_ioctl, +#endif +}; + +static struct address_space_operations btrfs_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, + .sync_page = block_sync_page, + .prepare_write = btrfs_prepare_write, + .commit_write = btrfs_commit_write, + .bmap = btrfs_bmap, +}; + +static struct address_space_operations btrfs_symlink_aops = { + .readpage = btrfs_readpage, + .writepage = btrfs_writepage, +}; + +static struct inode_operations btrfs_file_inode_operations = { + .truncate = btrfs_truncate, + .getattr = btrfs_getattr, + .setattr = btrfs_setattr, +}; + +static struct inode_operations btrfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, +}; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9749090cfbb8..94b3535a2a11 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -20,2686 +20,101 @@ #include "ioctl.h" #include "print-tree.h" -struct btrfs_iget_args { - u64 ino; - struct btrfs_root *root; -}; - -#define BTRFS_SUPER_MAGIC 0x9123682E - -static struct inode_operations btrfs_dir_inode_operations; -static struct inode_operations btrfs_symlink_inode_operations; -static struct inode_operations btrfs_dir_ro_inode_operations; -static struct super_operations btrfs_super_ops; -static struct file_operations btrfs_dir_file_operations; -static struct inode_operations btrfs_file_inode_operations; -static struct address_space_operations btrfs_aops; -static struct address_space_operations btrfs_symlink_aops; -static struct file_operations btrfs_file_operations; - -static int drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end, u64 *hint_block); -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); - - -#define S_SHIFT 12 -static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, -}; - -static void btrfs_read_locked_inode(struct inode *inode) -{ - struct btrfs_path *path; - struct btrfs_inode_item *inode_item; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_key location; - u64 alloc_group_block; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - mutex_lock(&root->fs_info->fs_mutex); - - memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); - ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { - btrfs_free_path(path); - goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); - BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, - alloc_group_block); - - btrfs_free_path(path); - inode_item = NULL; - - mutex_unlock(&root->fs_info->fs_mutex); - - switch (inode->i_mode & S_IFMT) { -#if 0 - default: - init_special_inode(inode, inode->i_mode, - btrfs_inode_rdev(inode_item)); - break; -#endif - case S_IFREG: - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - break; - case S_IFDIR: - inode->i_fop = &btrfs_dir_file_operations; - if (root == root->fs_info->tree_root) - inode->i_op = &btrfs_dir_ro_inode_operations; - else - inode->i_op = &btrfs_dir_inode_operations; - break; - case S_IFLNK: - inode->i_op = &btrfs_symlink_inode_operations; - inode->i_mapping->a_ops = &btrfs_symlink_aops; - break; - } - return; - -make_bad: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - make_bad_inode(inode); -} - -static void fill_inode_item(struct btrfs_inode_item *item, - struct inode *inode) -{ - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_block_group(item, - BTRFS_I(inode)->block_group->key.objectid); -} - -static int btrfs_update_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_inode_item *inode_item; - struct btrfs_path *path; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, 1); - if (ret) { - if (ret > 0) - ret = -ENOENT; - goto failed; - } - - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); - ret = 0; -failed: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - - -static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *dir, - struct dentry *dentry) -{ - struct btrfs_path *path; - const char *name = dentry->d_name.name; - int name_len = dentry->d_name.len; - int ret = 0; - u64 objectid; - struct btrfs_dir_item *di; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, - name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto err; - } - if (!di) { - ret = -ENOENT; - goto err; - } - objectid = btrfs_disk_key_objectid(&di->location); - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); - btrfs_release_path(root, path); - - di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto err; - } - if (!di) { - ret = -ENOENT; - goto err; - } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); - - dentry->d_inode->i_ctime = dir->i_ctime; -err: - btrfs_free_path(path); - if (!ret) { - dir->i_size -= name_len * 2; - btrfs_update_inode(trans, root, dir); - drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); - dir->i_sb->s_dirt = 1; - } - return ret; -} - -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) -{ - struct btrfs_root *root; - struct btrfs_trans_handle *trans; - int ret; - - root = BTRFS_I(dir)->root; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - ret = btrfs_unlink_trans(trans, root, dir, dentry); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return ret; -} - -static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_trans_handle *trans; - struct btrfs_key found_key; - int found_type; - struct btrfs_leaf *leaf; - char *goodnames = ".."; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - key.objectid = inode->i_ino; - key.offset = (u64)-1; - key.flags = (u32)-1; - while(1) { - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - err = ret; - goto out; - } - BUG_ON(ret == 0); - if (path->slots[0] == 0) { - err = -ENOENT; - goto out; - } - path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); - found_type = btrfs_key_type(&found_key); - if (found_key.objectid != inode->i_ino) { - err = -ENOENT; - goto out; - } - if ((found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY) || - (!btrfs_match_dir_item_name(root, path, goodnames, 2) && - !btrfs_match_dir_item_name(root, path, goodnames, 1))) { - err = -ENOTEMPTY; - goto out; - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - - if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1) - break; - btrfs_release_path(root, path); - } - ret = 0; - btrfs_release_path(root, path); - - /* now the directory is empty */ - err = btrfs_unlink_trans(trans, root, dir, dentry); - if (!err) { - inode->i_size = 0; - } -out: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - ret = btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root); - if (ret && !err) - err = ret; - return err; -} - -static int btrfs_free_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - struct btrfs_path *path; - int ret; - - clear_inode(inode); - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - ret = btrfs_lookup_inode(trans, root, path, - &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_free_path(path); - return ret; -} - -static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - int nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - if (slot == 0) - return; - nritems = btrfs_header_nritems(&node->header); - for (i = slot - 1; i >= 0; i--) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} - -static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode) -{ - int ret; - struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_disk_key *found_key; - u32 found_type; - struct btrfs_leaf *leaf; - struct btrfs_file_extent_item *fi; - u64 extent_start = 0; - u64 extent_num_blocks = 0; - u64 item_end = 0; - int found_extent; - int del_item; - - path = btrfs_alloc_path(); - BUG_ON(!path); - /* FIXME, add redo link to tree so we don't leak on crash */ - key.objectid = inode->i_ino; - key.offset = (u64)-1; - key.flags = (u32)-1; - while(1) { - btrfs_init_path(path); - fi = NULL; - ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) { - goto error; - } - if (ret > 0) { - BUG_ON(path->slots[0] == 0); - path->slots[0]--; - } - reada_truncate(root, path, inode->i_ino); - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) - break; - if (found_type != BTRFS_CSUM_ITEM_KEY && - found_type != BTRFS_DIR_ITEM_KEY && - found_type != BTRFS_DIR_INDEX_KEY && - found_type != BTRFS_EXTENT_DATA_KEY) - break; - item_end = btrfs_disk_key_offset(found_key); - if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != - BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; - } - } - if (found_type == BTRFS_CSUM_ITEM_KEY) { - ret = btrfs_csum_truncate(trans, root, path, - inode->i_size); - BUG_ON(ret); - } - if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; - } - break; - } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) - del_item = 1; - else - del_item = 0; - found_extent = 0; - - if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != - BTRFS_FILE_EXTENT_INLINE) { - u64 num_dec; - if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); - extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, - extent_num_blocks); - inode->i_blocks -= (orig_num_blocks - - extent_num_blocks) << 3; - mark_buffer_dirty(path->nodes[0]); - } else { - extent_start = - btrfs_file_extent_disk_blocknr(fi); - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); - /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; - if (extent_start != 0) { - found_extent = 1; - inode->i_blocks -= num_dec; - } - } - } - if (del_item) { - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - } else { - break; - } - btrfs_release_path(root, path); - if (found_extent) { - ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); - BUG_ON(ret); - } - } - ret = 0; -error: - btrfs_release_path(root, path); - btrfs_free_path(path); - inode->i_sb->s_dirt = 1; - return ret; -} - -static int btrfs_truncate_page(struct address_space *mapping, loff_t from) -{ - struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; - pgoff_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - struct page *page; - char *kaddr; - int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint; - struct btrfs_key ins; - struct btrfs_trans_handle *trans; - - if ((offset & (blocksize - 1)) == 0) - goto out; - - ret = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - - if (!PageUptodate(page)) { - ret = mpage_readpage(page, btrfs_get_block); - lock_page(page); - if (!PageUptodate(page)) { - ret = -EIO; - goto out; - } - } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT, - (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, - alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - ins.objectid, 1, 1); - BUG_ON(ret); - SetPageChecked(page); - kaddr = kmap(page); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - kaddr, PAGE_CACHE_SIZE); - kunmap(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); -out: - return ret; -} - -static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = dentry->d_inode; - int err; - - err = inode_change_ok(inode, attr); - if (err) - return err; - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 mask = root->blocksize - 1; - u64 pos = (inode->i_size + mask) & ~mask; - u64 hole_size; - - if (attr->ia_size <= pos) - goto out; - - btrfs_truncate_page(inode->i_mapping, inode->i_size); - - hole_size = (attr->ia_size - pos + mask) & ~mask; - hole_size >>= inode->i_blkbits; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - pos, 0, 0, hole_size); - BUG_ON(err); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - } -out: - err = inode_setattr(inode, attr); - - return err; -} -static void btrfs_delete_inode(struct inode *inode) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - - truncate_inode_pages(&inode->i_data, 0); - if (is_bad_inode(inode)) { - goto no_delete; - } - inode->i_size = 0; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return; -no_delete: - clear_inode(inode); -} - -static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, - struct btrfs_key *location) -{ - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - struct btrfs_dir_item *di; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name, - namelen, 0); - if (!di || IS_ERR(di)) { - location->objectid = 0; - ret = 0; - goto out; - } - btrfs_disk_key_to_cpu(location, &di->location); -out: - btrfs_release_path(root, path); - btrfs_free_path(path); - return ret; -} - -static int fixup_tree_root_location(struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_root **sub_root) -{ - struct btrfs_path *path; - struct btrfs_root_item *ri; - - if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) - return 0; - if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) - return 0; - - path = btrfs_alloc_path(); - BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); - - *sub_root = btrfs_read_fs_root(root->fs_info, location); - if (IS_ERR(*sub_root)) - return PTR_ERR(*sub_root); - - ri = &(*sub_root)->root_item; - location->objectid = btrfs_root_dirid(ri); - location->flags = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - location->offset = 0; - - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - -static int btrfs_init_locked_inode(struct inode *inode, void *p) -{ - struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; - BTRFS_I(inode)->root = args->root; - return 0; -} - -static int btrfs_find_actor(struct inode *inode, void *opaque) -{ - struct btrfs_iget_args *args = opaque; - return (args->ino == inode->i_ino && - args->root == BTRFS_I(inode)->root); -} - -static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, - struct btrfs_root *root) -{ - struct inode *inode; - struct btrfs_iget_args args; - args.ino = objectid; - args.root = root; - - inode = iget5_locked(s, objectid, btrfs_find_actor, - btrfs_init_locked_inode, - (void *)&args); - return inode; -} - -static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct inode * inode; - struct btrfs_inode *bi = BTRFS_I(dir); - struct btrfs_root *root = bi->root; - struct btrfs_root *sub_root = root; - struct btrfs_key location; - int ret; - - if (dentry->d_name.len > BTRFS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_inode_by_name(dir, dentry, &location); - mutex_unlock(&root->fs_info->fs_mutex); - if (ret < 0) - return ERR_PTR(ret); - inode = NULL; - if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); - if (ret < 0) - return ERR_PTR(ret); - if (ret > 0) - return ERR_PTR(-ENOENT); - inode = btrfs_iget_locked(dir->i_sb, location.objectid, - sub_root); - if (!inode) - return ERR_PTR(-EACCES); - if (inode->i_state & I_NEW) { - if (sub_root != root) { -printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root); - igrab(inode); - sub_root->inode = inode; - } - BTRFS_I(inode)->root = sub_root; - memcpy(&BTRFS_I(inode)->location, &location, - sizeof(location)); - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - } - } - return d_splice_alias(inode, dentry); -} - -static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path, - u64 objectid) -{ - struct btrfs_node *node; - int i; - u32 nritems; - u64 item_objectid; - u64 blocknr; - int slot; - int ret; - - if (!path->nodes[1]) - return; - node = btrfs_buffer_node(path->nodes[1]); - slot = path->slots[1]; - nritems = btrfs_header_nritems(&node->header); - for (i = slot + 1; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); - if (item_objectid != objectid) - break; - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); - if (ret) - break; - } -} -static unsigned char btrfs_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - -static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_item *item; - struct btrfs_dir_item *di; - struct btrfs_key key; - struct btrfs_path *path; - int ret; - u32 nritems; - struct btrfs_leaf *leaf; - int slot; - int advance; - unsigned char d_type; - int over = 0; - u32 di_cur; - u32 di_total; - u32 di_len; - int key_type = BTRFS_DIR_INDEX_KEY; - - /* FIXME, use a real flag for deciding about the key type */ - if (root->fs_info->tree_root == root) - key_type = BTRFS_DIR_ITEM_KEY; - mutex_lock(&root->fs_info->fs_mutex); - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, key_type); - key.offset = filp->f_pos; - path = btrfs_alloc_path(); - btrfs_init_path(path); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) - goto err; - advance = 0; - reada_leaves(root, path, inode->i_ino); - while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - if (advance || slot >= nritems) { - if (slot >= nritems -1) { - reada_leaves(root, path, inode->i_ino); - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - } else { - slot++; - path->slots[0]++; - } - } - advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) - break; - if (btrfs_disk_key_type(&item->key) != key_type) - break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) - continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); - advance = 1; - di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); - di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); - while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), - d_type); - if (over) - goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); - di_cur += di_len; - di = (struct btrfs_dir_item *)((char *)di + di_len); - } - } - filp->f_pos++; -nopos: - ret = 0; -err: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - -static void btrfs_put_super (struct super_block * sb) -{ - struct btrfs_root *root = btrfs_sb(sb); - int ret; - - ret = close_ctree(root); - if (ret) { - printk("close ctree returns %d\n", ret); - } - sb->s_fs_info = NULL; -} - -static int btrfs_fill_super(struct super_block * sb, void * data, int silent) -{ - struct inode * inode; - struct dentry * root_dentry; - struct btrfs_super_block *disk_super; - struct btrfs_root *tree_root; - struct btrfs_inode *bi; - - sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_magic = BTRFS_SUPER_MAGIC; - sb->s_op = &btrfs_super_ops; - sb->s_time_gran = 1; - - tree_root = open_ctree(sb); - - if (!tree_root) { - printk("btrfs: open_ctree failed\n"); - return -EIO; - } - sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; - printk("read in super total blocks %Lu root %Lu\n", - btrfs_super_total_blocks(disk_super), - btrfs_super_root_dir(disk_super)); - - inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), - tree_root); - bi = BTRFS_I(inode); - bi->location.objectid = inode->i_ino; - bi->location.offset = 0; - bi->location.flags = 0; - bi->root = tree_root; - btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); - - if (!inode) - return -ENOMEM; - if (inode->i_state & I_NEW) { - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); - } - - root_dentry = d_alloc_root(inode); - if (!root_dentry) { - iput(inode); - return -ENOMEM; - } - sb->s_root = root_dentry; - btrfs_transaction_queue_work(tree_root, HZ * 30); - return 0; -} - -static int btrfs_write_inode(struct inode *inode, int wait) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - int ret = 0; - - if (wait) { - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - } - return ret; -} - -static void btrfs_dirty_inode(struct inode *inode) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - btrfs_update_inode(trans, root, inode); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); -} - -static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, - struct btrfs_block_group_cache *group, - int mode) -{ - struct inode *inode; - struct btrfs_inode_item inode_item; - struct btrfs_key *location; - int ret; - int owner; - - inode = new_inode(root->fs_info->sb); - if (!inode) - return ERR_PTR(-ENOMEM); - - BTRFS_I(inode)->root = root; - if (mode & S_IFDIR) - owner = 0; - else - owner = 1; - group = btrfs_find_block_group(root, group, 0, 0, owner); - BTRFS_I(inode)->block_group = group; - - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_mode = mode; - inode->i_ino = objectid; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); - location = &BTRFS_I(inode)->location; - location->objectid = objectid; - location->flags = 0; - location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - - insert_inode_hash(inode); - return inode; -} - -static inline u8 btrfs_inode_type(struct inode *inode) -{ - return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; -} - -static int btrfs_add_link(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) -{ - int ret; - struct btrfs_key key; - struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; - key.objectid = inode->i_ino; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; - - ret = btrfs_insert_dir_item(trans, root, - dentry->d_name.name, dentry->d_name.len, - dentry->d_parent->d_inode->i_ino, - &key, btrfs_inode_type(inode)); - if (ret == 0) { - dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2; - ret = btrfs_update_inode(trans, root, - dentry->d_parent->d_inode); - } - return ret; -} - -static int btrfs_add_nondir(struct btrfs_trans_handle *trans, - struct dentry *dentry, struct inode *inode) -{ - int err = btrfs_add_link(trans, dentry, inode); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } - if (err > 0) - err = -EEXIST; - return err; -} - -static int btrfs_create(struct inode *dir, struct dentry *dentry, - int mode, struct nameidata *nd) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode; - int err; - int drop_inode = 0; - u64 objectid; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_unlock; - - btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - } - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); -out_unlock: - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_link(struct dentry *old_dentry, struct inode *dir, - struct dentry *dentry) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct inode *inode = old_dentry->d_inode; - int err; - int drop_inode = 0; - - if (inode->i_nlink == 0) - return -ENOENT; - - inc_nlink(inode); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - atomic_inc(&inode->i_count); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 objectid, u64 dirid) -{ - int ret; - char buf[2]; - struct btrfs_key key; - - buf[0] = '.'; - buf[1] = '.'; - - key.objectid = objectid; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - - ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; - key.objectid = dirid; - ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, - &key, BTRFS_FT_DIR); - if (ret) - goto error; -error: - return ret; -} - -static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - struct inode *inode; - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - int err = 0; - int drop_on_err = 0; - u64 objectid; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out_unlock; - } - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, S_IFDIR | mode); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto out_fail; - } - drop_on_err = 1; - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; - btrfs_set_trans_block_group(trans, inode); - - err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino); - if (err) - goto out_fail; - - inode->i_size = 6; - err = btrfs_update_inode(trans, root, inode); - if (err) - goto out_fail; - err = btrfs_add_link(trans, dentry, inode); - if (err) - goto out_fail; - d_instantiate(dentry, inode); - drop_on_err = 0; - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); - -out_fail: - btrfs_end_transaction(trans, root); -out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_on_err) - iput(inode); - btrfs_btree_balance_dirty(root); - return err; -} - -static int btrfs_sync_file(struct file *file, - struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct btrfs_trans_handle *trans; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto out; - } - ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); -out: - return ret > 0 ? EIO : ret; -} - -static int btrfs_sync_fs(struct super_block *sb, int wait) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root; - int ret; - root = btrfs_sb(sb); - - sb->s_dirt = 0; - if (!wait) { - filemap_flush(root->fs_info->btree_inode->i_mapping); - return 0; - } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_commit_transaction(trans, root); - sb->s_dirt = 0; - BUG_ON(ret); -printk("btrfs sync_fs\n"); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - -#define BTRFS_GET_BLOCK_NO_CREATE 0 -#define BTRFS_GET_BLOCK_CREATE 1 -#define BTRFS_GET_BLOCK_NO_DIRECT 2 - -static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - int err = 0; - u64 blocknr; - u64 extent_start = 0; - u64 extent_end = 0; - u64 objectid = inode->i_ino; - u32 found_type; - u64 alloc_hint = 0; - struct btrfs_path *path; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; - struct btrfs_trans_handle *trans = NULL; - - path = btrfs_alloc_path(); - BUG_ON(!path); - btrfs_init_path(path); - if (create & BTRFS_GET_BLOCK_CREATE) { - WARN_ON(1); - /* this almost but not quite works */ - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out; - } - ret = drop_extents(trans, root, inode, - iblock << inode->i_blkbits, - (iblock + 1) << inode->i_blkbits, - &alloc_hint); - BUG_ON(ret); - } - - ret = btrfs_lookup_file_extent(NULL, root, path, - inode->i_ino, - iblock << inode->i_blkbits, 0); - if (ret < 0) { - err = ret; - goto out; - } - - if (ret != 0) { - if (path->slots[0] == 0) { - btrfs_release_path(root, path); - goto not_found; - } - path->slots[0]--; - } - - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); - - /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || - found_type != BTRFS_EXTENT_DATA_KEY) { - extent_end = 0; - extent_start = 0; - goto not_found; - } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_start = extent_start >> inode->i_blkbits; - extent_end = extent_start + btrfs_file_extent_num_blocks(item); - err = 0; - if (btrfs_file_extent_disk_blocknr(item) == 0) - goto out; - if (iblock >= extent_start && iblock < extent_end) { - btrfs_map_bh_to_logical(root, result, blocknr + - iblock - extent_start); - goto out; - } - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; - char *map; - u32 size; - - if (create & BTRFS_GET_BLOCK_NO_DIRECT) { - err = -EINVAL; - goto out; - } - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = (extent_start + size) >> inode->i_blkbits; - extent_start >>= inode->i_blkbits; - if (iblock < extent_start || iblock > extent_end) { - goto not_found; - } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(result->b_page); - memcpy(map, ptr, size); - memset(map + size, 0, PAGE_CACHE_SIZE - size); - flush_dcache_page(result->b_page); - kunmap(result->b_page); - set_buffer_uptodate(result); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, 0); - } -not_found: - if (create & BTRFS_GET_BLOCK_CREATE) { - struct btrfs_key ins; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - 1, alloc_hint, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - iblock << inode->i_blkbits, - ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - SetPageChecked(result->b_page); - btrfs_map_bh_to_logical(root, result, ins.objectid); - } -out: - if (trans) - err = btrfs_end_transaction(trans, root); - btrfs_free_path(path); - return err; -} - -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int err; - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - err = btrfs_get_block_lock(inode, iblock, result, create); - mutex_unlock(&root->fs_info->fs_mutex); - return err; -} - -static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - -static sector_t btrfs_bmap(struct address_space *as, sector_t block) -{ - return generic_block_bmap(as, block, btrfs_get_block_bmap); -} - -static int btrfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return block_prepare_write(page, from, to, btrfs_get_block); -} - -static void btrfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; -} - -static int btrfs_readpage(struct file *file, struct page *page) -{ - return mpage_readpage(page, btrfs_get_block); -} - -/* - * While block_write_full_page is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them - * again at any time. We handle that by only looking at the buffer - * state inside lock_buffer(). - * - * If block_write_full_page() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. - */ -static int __btrfs_write_full_page(struct inode *inode, struct page *page, - struct writeback_control *wbc) -{ - int err; - sector_t block; - sector_t last_block; - struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; - int nr_underway = 0; - - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - - /* - * Be very careful. We have no exclusion from __set_page_dirty_buffers - * here, and the (potentially unmapped) buffers may become dirty at - * any time. If a buffer becomes dirty here after we've inspected it - * then we just miss that fact, and the page stays dirty. - * - * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; - * handle that here by just cleaning them. - */ - - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); - bh = head; - - /* - * Get all the dirty buffers mapped to disk addresses and - * handle any aliases from the underlying blockdev's mapping. - */ - do { - if (block > last_block) { - /* - * mapped buffers outside i_size will occur, because - * this page can be outside i_size when there is a - * truncate in progress. - */ - /* - * The buffer was zeroed by block_write_full_page() - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block(inode, block, bh, 0); - if (err) { -printk("writepage going to recovery err %d\n", err); - goto recover; - } - if (buffer_new(bh)) { - /* blockdev mappings never come here */ - clear_buffer_new(bh); - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - do { - if (!buffer_mapped(bh)) - continue; - /* - * If it's a fully non-blocking write attempt and we cannot - * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd - * activity, but those code paths have their own higher-level - * throttling. - */ - if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { - lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } - if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. - */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - - err = 0; -done: - if (nr_underway == 0) { - /* - * The page was marked dirty, but the buffers were - * clean. Someone wrote them back by hand with - * ll_rw_block/submit_bh. A rare case. - */ - int uptodate = 1; - do { - if (!buffer_uptodate(bh)) { - uptodate = 0; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (uptodate) - SetPageUptodate(page); - end_page_writeback(page); - } - return err; - -recover: - /* - * ENOSPC, or some other error. We may already have added some - * blocks to the file, so we need to write these out to avoid - * exposing stale data. - * The page is currently locked and not marked for writeback - */ - bh = head; - /* Recovery: lock and submit the mapped buffers */ - do { - if (buffer_mapped(bh) && buffer_dirty(bh)) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * The buffer may have been set dirty during - * attachment to a dirty page. - */ - clear_buffer_dirty(bh); - } - } while ((bh = bh->b_this_page) != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - goto done; -} - -/* - * The generic ->writepage function for buffer-backed address_spaces - */ -static int btrfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset; - void *kaddr; - - /* Is the page fully inside i_size? */ - if (page->index < end_index) - return __btrfs_write_full_page(inode, page, wbc); - - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - block_invalidatepage(page, 0); - unlock_page(page); - return 0; /* don't care */ - } - - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invokation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - return __btrfs_write_full_page(inode, page, wbc); -} - -static void btrfs_truncate(struct inode *inode) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct btrfs_trans_handle *trans; - - if (!S_ISREG(inode->i_mode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - - btrfs_truncate_page(inode->i_mapping, inode->i_size); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_update_inode(trans, root, inode); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); -} - -static int btrfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *bh; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - SetPageUptodate(page); - bh = page_buffers(page); - set_buffer_uptodate(bh); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - set_page_dirty(page); - } - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} - -static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, - struct page **prepared_pages, - const char __user * buf) -{ - long page_fault = 0; - int i; - int offset = pos & (PAGE_CACHE_SIZE - 1); - - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { - size_t count = min_t(size_t, - PAGE_CACHE_SIZE - offset, write_bytes); - struct page *page = prepared_pages[i]; - fault_in_pages_readable(buf, count); - - /* Copy data from userspace to the current page */ - kmap(page); - page_fault = __copy_from_user(page_address(page) + offset, - buf, count); - /* Flush processor's dcache for this page */ - flush_dcache_page(page); - kunmap(page); - buf += count; - write_bytes -= count; - - if (page_fault) - break; - } - return page_fault ? -EFAULT : 0; -} - -static void btrfs_drop_pages(struct page **pages, size_t num_pages) -{ - size_t i; - for (i = 0; i < num_pages; i++) { - if (!pages[i]) - break; - unlock_page(pages[i]); - mark_page_accessed(pages[i]); - page_cache_release(pages[i]); - } -} -static int dirty_and_release_pages(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - size_t write_bytes) -{ - int i; - int offset; - int err = 0; - int ret; - int this_write; - struct inode *inode = file->f_path.dentry->d_inode; - struct buffer_head *bh; - struct btrfs_file_extent_item *ei; - - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - /* FIXME, one block at a time */ - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - bh = page_buffers(pages[i]); - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - struct btrfs_key key; - struct btrfs_path *path; - char *ptr; - u32 datasize; - - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = pages[i]->index << PAGE_CACHE_SHIFT; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(write_bytes >= PAGE_CACHE_SIZE); - datasize = offset + - btrfs_file_extent_calc_inline_size(write_bytes); - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, bh->b_data, offset + write_bytes); - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - } else if (buffer_mapped(bh)) { - btrfs_csum_file_block(trans, root, inode->i_ino, - pages[i]->index << PAGE_CACHE_SHIFT, - kmap(pages[i]), PAGE_CACHE_SIZE); - kunmap(pages[i]); - } - SetPageChecked(pages[i]); - // btrfs_update_inode_block_group(trans, inode); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); +#define BTRFS_SUPER_MAGIC 0x9123682E - ret = btrfs_commit_write(file, pages[i], offset, - offset + this_write); - pos += this_write; - if (ret) { - err = ret; - goto failed; - } - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; - } -failed: - return err; -} +static struct super_operations btrfs_super_ops; -static int drop_extents(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 start, u64 end, u64 *hint_block) +static void btrfs_put_super (struct super_block * sb) { + struct btrfs_root *root = btrfs_sb(sb); int ret; - struct btrfs_key key; - struct btrfs_leaf *leaf; - int slot; - struct btrfs_file_extent_item *extent; - u64 extent_end = 0; - int keep; - struct btrfs_file_extent_item old; - struct btrfs_path *path; - u64 search_start = start; - int bookend; - int found_type; - int found_extent; - int found_inline; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - while(1) { - btrfs_release_path(root, path); - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, - search_start, -1); - if (ret < 0) - goto out; - if (ret > 0) { - if (path->slots[0] == 0) { - ret = 0; - goto out; - } - path->slots[0]--; - } - keep = 0; - bookend = 0; - found_extent = 0; - found_inline = 0; - extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); - slot = path->slots[0]; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - if (key.offset >= end || key.objectid != inode->i_ino) { - ret = 0; - goto out; - } - if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) { - ret = 0; - goto out; - } - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); - if (found_type == BTRFS_FILE_EXTENT_REG) { - extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << - inode->i_blkbits); - found_extent = 1; - } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - found_inline = 1; - extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + slot); - } - - if (!found_extent && !found_inline) { - ret = 0; - goto out; - } - - if (search_start >= extent_end) { - ret = 0; - goto out; - } - - if (found_inline) { - u64 mask = root->blocksize - 1; - search_start = (extent_end + mask) & ~mask; - } else - search_start = extent_end; - if (end < extent_end && end >= key.offset) { - if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); - if (disk_blocknr != 0) { - ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); - BUG_ON(ret); - } - } - WARN_ON(found_inline); - bookend = 1; - } - - if (start > key.offset) { - u64 new_num; - u64 old_num; - /* truncate existing extent */ - keep = 1; - WARN_ON(start & (root->blocksize - 1)); - if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); - *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { - inode->i_blocks -= - (old_num - new_num) << 3; - } - btrfs_set_file_extent_num_blocks(extent, - new_num); - mark_buffer_dirty(path->nodes[0]); - } else { - WARN_ON(1); - } - } - if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; - if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(extent); - *hint_block = - btrfs_file_extent_disk_blocknr(extent); - } - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); - btrfs_release_path(root, path); - extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; - ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); - } - - BUG_ON(ret); - if (!bookend && search_start >= end) { - ret = 0; - goto out; - } - if (!bookend) - continue; - } - if (bookend && found_extent) { - /* create bookend */ - struct btrfs_key ins; - ins.objectid = inode->i_ino; - ins.offset = end; - ins.flags = 0; - btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); - - btrfs_release_path(root, path); - ret = btrfs_insert_empty_item(trans, root, path, &ins, - sizeof(*extent)); - BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, - (extent_end - end) >> inode->i_blkbits); - - btrfs_set_file_extent_type(extent, - BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); - btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { - inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; - } - ret = 0; - goto out; - } - } -out: - btrfs_free_path(path); - return ret; -} - -static int prepare_pages(struct btrfs_root *root, - struct file *file, - struct page **pages, - size_t num_pages, - loff_t pos, - unsigned long first_index, - unsigned long last_index, - size_t write_bytes, - u64 alloc_extent_start) -{ - int i; - unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; - int offset; - int err = 0; - int this_write; - struct buffer_head *bh; - struct buffer_head *head; - loff_t isize = i_size_read(inode); - - memset(pages, 0, num_pages * sizeof(struct page *)); - - for (i = 0; i < num_pages; i++) { - pages[i] = grab_cache_page(inode->i_mapping, index + i); - if (!pages[i]) { - err = -ENOMEM; - goto failed_release; - } - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); - wait_on_page_writeback(pages[i]); - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min(PAGE_CACHE_SIZE - offset, write_bytes); - if (!page_has_buffers(pages[i])) { - create_empty_buffers(pages[i], - root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); - } - head = page_buffers(pages[i]); - bh = head; - do { - err = btrfs_map_bh_to_logical(root, bh, - alloc_extent_start); - BUG_ON(err); - if (err) - goto failed_truncate; - bh = bh->b_this_page; - if (alloc_extent_start) - alloc_extent_start++; - } while (bh != head); - pos += this_write; - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; + ret = close_ctree(root); + if (ret) { + printk("close ctree returns %d\n", ret); } - return 0; - -failed_release: - btrfs_drop_pages(pages, num_pages); - return err; - -failed_truncate: - btrfs_drop_pages(pages, num_pages); - if (pos > isize) - vmtruncate(inode, isize); - return err; + sb->s_fs_info = NULL; } -static ssize_t btrfs_file_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { - loff_t pos; - size_t num_written = 0; - int err = 0; - int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *pages[8]; - struct page *pinned[2]; - unsigned long first_index; - unsigned long last_index; - u64 start_pos; - u64 num_blocks; - u64 alloc_extent_start; - u64 hint_block; - struct btrfs_trans_handle *trans; - struct btrfs_key ins; - pinned[0] = NULL; - pinned[1] = NULL; - if (file->f_flags & O_DIRECT) - return -EINVAL; - pos = *ppos; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - current->backing_dev_info = inode->i_mapping->backing_dev_info; - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - if (count == 0) - goto out; - err = remove_suid(file->f_path.dentry); - if (err) - goto out; - file_update_time(file); + struct inode * inode; + struct dentry * root_dentry; + struct btrfs_super_block *disk_super; + struct btrfs_root *tree_root; + struct btrfs_inode *bi; + int err; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); - num_blocks = (count + pos - start_pos + root->blocksize - 1) >> - inode->i_blkbits; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_magic = BTRFS_SUPER_MAGIC; + sb->s_op = &btrfs_super_ops; + sb->s_time_gran = 1; - mutex_lock(&inode->i_mutex); - first_index = pos >> PAGE_CACHE_SHIFT; - last_index = (pos + count) >> PAGE_CACHE_SHIFT; + tree_root = open_ctree(sb); - if ((pos & (PAGE_CACHE_SIZE - 1))) { - pinned[0] = grab_cache_page(inode->i_mapping, first_index); - if (!PageUptodate(pinned[0])) { - ret = mpage_readpage(pinned[0], btrfs_get_block); - BUG_ON(ret); - wait_on_page_locked(pinned[0]); - } else { - unlock_page(pinned[0]); - } - } - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { - pinned[1] = grab_cache_page(inode->i_mapping, last_index); - if (!PageUptodate(pinned[1])) { - ret = mpage_readpage(pinned[1], btrfs_get_block); - BUG_ON(ret); - wait_on_page_locked(pinned[1]); - } else { - unlock_page(pinned[1]); - } + if (!tree_root || IS_ERR(tree_root)) { + printk("btrfs: open_ctree failed\n"); + return -EIO; } + sb->s_fs_info = tree_root; + disk_super = tree_root->fs_info->disk_super; + inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), + tree_root); + bi = BTRFS_I(inode); + bi->location.objectid = inode->i_ino; + bi->location.offset = 0; + bi->location.flags = 0; + bi->root = tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { + if (!inode) { err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - if (start_pos < inode->i_size) { - /* FIXME blocksize != pagesize */ - ret = drop_extents(trans, root, inode, - start_pos, - (pos + count + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - BUG_ON(ret); - } - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (inode->i_size + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - ret = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); - } - BUG_ON(ret); - } - if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size || - pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - ret = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, hint_block, (u64)-1, - &ins, 1); - BUG_ON(ret); - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(ret); - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(ret); - alloc_extent_start = ins.objectid; - // btrfs_update_inode_block_group(trans, inode); - ret = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - while(count > 0) { - size_t offset = pos & (PAGE_CACHE_SIZE - 1); - size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset); - size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - - memset(pages, 0, sizeof(pages)); - ret = prepare_pages(root, file, pages, num_pages, - pos, first_index, last_index, - write_bytes, alloc_extent_start); - BUG_ON(ret); - - /* FIXME blocks != pagesize */ - if (alloc_extent_start) - alloc_extent_start += num_pages; - ret = btrfs_copy_from_user(pos, num_pages, - write_bytes, pages, buf); - BUG_ON(ret); - - ret = dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); - BUG_ON(ret); - btrfs_drop_pages(pages, num_pages); - - buf += write_bytes; - count -= write_bytes; - pos += write_bytes; - num_written += write_bytes; - - balance_dirty_pages_ratelimited(inode->i_mapping); - btrfs_btree_balance_dirty(root); - cond_resched(); - } -out_unlock: - mutex_unlock(&inode->i_mutex); -out: - if (pinned[0]) - page_cache_release(pinned[0]); - if (pinned[1]) - page_cache_release(pinned[1]); - *ppos = pos; - current->backing_dev_info = NULL; - mark_inode_dirty(inode); - return num_written ? num_written : err; -} - -static int btrfs_read_actor(read_descriptor_t *desc, struct page *page, - unsigned long offset, unsigned long size) -{ - char *kaddr; - unsigned long left, count = desc->count; - struct inode *inode = page->mapping->host; - - if (size > count) - size = count; - - if (!PageChecked(page)) { - /* FIXME, do it per block */ - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - struct buffer_head *bh; - - if (page_has_buffers(page)) { - bh = page_buffers(page); - if (!buffer_mapped(bh)) { - SetPageChecked(page); - goto checked; - } - } - - ret = btrfs_csum_verify_file_block(root, - page->mapping->host->i_ino, - page->index << PAGE_CACHE_SHIFT, - kmap(page), PAGE_CACHE_SIZE); - if (ret) { - if (ret != -ENOENT) { - printk("failed to verify ino %lu page %lu ret %d\n", - page->mapping->host->i_ino, - page->index, ret); - memset(page_address(page), 1, PAGE_CACHE_SIZE); - flush_dcache_page(page); - } - } - SetPageChecked(page); - kunmap(page); - } -checked: - /* - * Faults on the destination of a read are common, so do it before - * taking the kmap. - */ - if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_to_user_inatomic(desc->arg.buf, - kaddr + offset, size); - kunmap_atomic(kaddr, KM_USER0); - if (left == 0) - goto success; - } - - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_to_user(desc->arg.buf, kaddr + offset, size); - kunmap(page); - - if (left) { - size -= left; - desc->error = -EFAULT; + goto fail_close; } -success: - desc->count = count - size; - desc->written += size; - desc->arg.buf += size; - return size; -} - -/** - * btrfs_file_aio_read - filesystem read routine - * @iocb: kernel I/O control block - * @iov: io vector request - * @nr_segs: number of segments in the iovec - * @pos: current file position - */ -static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) -{ - struct file *filp = iocb->ki_filp; - ssize_t retval; - unsigned long seg; - size_t count; - loff_t *ppos = &iocb->ki_pos; - - count = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - count += iv->iov_len; - if (unlikely((ssize_t)(count|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - nr_segs = seg; - count -= iv->iov_len; /* This segment is no good */ - break; + if (inode->i_state & I_NEW) { + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); } - retval = 0; - if (count) { - for (seg = 0; seg < nr_segs; seg++) { - read_descriptor_t desc; - desc.written = 0; - desc.arg.buf = iov[seg].iov_base; - desc.count = iov[seg].iov_len; - if (desc.count == 0) - continue; - desc.error = 0; - do_generic_file_read(filp, ppos, &desc, - btrfs_read_actor); - retval += desc.written; - if (desc.error) { - retval = retval ?: desc.error; - break; - } - } + root_dentry = d_alloc_root(inode); + if (!root_dentry) { + iput(inode); + err = -ENOMEM; + goto fail_close; } - return retval; -} - -static int create_subvol(struct btrfs_root *root, char *name, int namelen) -{ - struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item root_item; - struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; - struct btrfs_root *new_root; - struct inode *inode; - struct inode *dir; - int ret; - u64 objectid; - u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - mark_buffer_dirty(subvol); - - inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); - - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); - btrfs_set_root_refs(&root_item, 1); - brelse(subvol); - subvol = NULL; - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - BUG_ON(ret); - - btrfs_set_root_dirid(&root_item, new_dirid); - - key.objectid = objectid; - key.offset = 1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &root_item); - BUG_ON(ret); - - /* - * insert the directory item - */ - key.offset = (u64)-1; - dir = root->fs_info->sb->s_root->d_inode; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, dir->i_ino, &key, - BTRFS_FT_DIR); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - - new_root = btrfs_read_fs_root(root->fs_info, &key); - BUG_ON(!new_root); - - trans = btrfs_start_transaction(new_root, 1); - BUG_ON(!trans); - - inode = btrfs_new_inode(trans, new_root, new_dirid, - BTRFS_I(dir)->block_group, S_IFDIR | 0700); - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; - - ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); - - inode->i_nlink = 1; - inode->i_size = 6; - ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - - iput(inode); - - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); + sb->s_root = root_dentry; + btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; + +fail_close: + close_ctree(tree_root); + return err; } -static int create_snapshot(struct btrfs_root *root, char *name, int namelen) +static int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; - struct btrfs_key key; - struct btrfs_root_item new_root_item; + struct btrfs_root *root; int ret; - u64 objectid; - - if (!root->ref_cows) - return -EINVAL; + root = btrfs_sb(sb); + sb->s_dirt = 0; + if (!wait) { + filemap_flush(root->fs_info->btree_inode->i_mapping); + return 0; + } mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - BUG_ON(ret); - - memcpy(&new_root_item, &root->root_item, - sizeof(new_root_item)); - - key.objectid = objectid; - key.offset = 1; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); - - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - &new_root_item); - BUG_ON(ret); - - /* - * insert the directory item - */ - key.offset = (u64)-1; - ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, - name, namelen, - root->fs_info->sb->s_root->d_inode->i_ino, - &key, BTRFS_FT_DIR); - - BUG_ON(ret); - - ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); - ret = btrfs_commit_transaction(trans, root); + sb->s_dirt = 0; BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - btrfs_btree_balance_dirty(root); - return 0; -} - -static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int - cmd, unsigned long arg) -{ - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_ioctl_vol_args vol_args; - int ret = 0; - struct btrfs_dir_item *di; - int namelen; - struct btrfs_path *path; - u64 root_dirid; - - switch (cmd) { - case BTRFS_IOC_SNAP_CREATE: - if (copy_from_user(&vol_args, - (struct btrfs_ioctl_vol_args __user *)arg, - sizeof(vol_args))) - return -EFAULT; - namelen = strlen(vol_args.name); - if (namelen > BTRFS_VOL_NAME_MAX) - return -EINVAL; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); - di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, - path, root_dirid, - vol_args.name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); - btrfs_free_path(path); - if (di && !IS_ERR(di)) - return -EEXIST; - - if (root == root->fs_info->tree_root) - ret = create_subvol(root, vol_args.name, namelen); - else - ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); - break; - default: - return -ENOTTY; - } - return ret; -} - -#ifdef CONFIG_COMPAT -static long btrfs_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct inode *inode = file->f_path.dentry->d_inode; - int ret; - lock_kernel(); - ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg)); - unlock_kernel(); - return ret; - -} -#endif - -static struct kmem_cache *btrfs_inode_cachep; -struct kmem_cache *btrfs_trans_handle_cachep; -struct kmem_cache *btrfs_transaction_cachep; -struct kmem_cache *btrfs_bit_radix_cachep; -struct kmem_cache *btrfs_path_cachep; - -/* - * Called inside transaction, so use GFP_NOFS - */ -static struct inode *btrfs_alloc_inode(struct super_block *sb) -{ - struct btrfs_inode *ei; - - ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); - if (!ei) - return NULL; - return &ei->vfs_inode; -} - -static void btrfs_destroy_inode(struct inode *inode) -{ - WARN_ON(!list_empty(&inode->i_dentry)); - WARN_ON(inode->i_data.nrpages); - - kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); -} - -static void init_once(void * foo, struct kmem_cache * cachep, - unsigned long flags) -{ - struct btrfs_inode *ei = (struct btrfs_inode *) foo; - - if ((flags & (SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - inode_init_once(&ei->vfs_inode); - } -} - -static int init_inodecache(void) -{ - btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", - sizeof(struct btrfs_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once, NULL); - btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", - sizeof(struct btrfs_trans_handle), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", - sizeof(struct btrfs_transaction), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - NULL, NULL); - btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix", - 256, - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD | - SLAB_DESTROY_BY_RCU), - NULL, NULL); - if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL || - btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL) - return -ENOMEM; return 0; } -static void destroy_inodecache(void) +static void btrfs_write_super(struct super_block *sb) { - kmem_cache_destroy(btrfs_inode_cachep); - kmem_cache_destroy(btrfs_trans_handle_cachep); - kmem_cache_destroy(btrfs_transaction_cachep); - kmem_cache_destroy(btrfs_bit_radix_cachep); - kmem_cache_destroy(btrfs_path_cachep); + sb->s_dirt = 0; } static int btrfs_get_sb(struct file_system_type *fs_type, @@ -2709,15 +124,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, btrfs_fill_super, mnt); } -static int btrfs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - generic_fillattr(inode, stat); - stat->blksize = 256 * 1024; - return 0; -} - static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); @@ -2732,197 +138,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(old_dir)->root; - struct inode *new_inode = new_dentry->d_inode; - struct inode *old_inode = old_dentry->d_inode; - struct timespec ctime = CURRENT_TIME; - struct btrfs_path *path; - struct btrfs_dir_item *di; - int ret; - - if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { - return -ENOTEMPTY; - } - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, new_dir); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out_fail; - } - - old_dentry->d_inode->i_nlink++; - old_dir->i_ctime = old_dir->i_mtime = ctime; - new_dir->i_ctime = new_dir->i_mtime = ctime; - old_inode->i_ctime = ctime; - if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { - struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; - di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - old_parent_oid = btrfs_disk_key_objectid(&di->location); - ret = btrfs_del_item(trans, root, path); - if (ret) { - ret = -EIO; - goto out_fail; - } - btrfs_release_path(root, path); - - di = btrfs_lookup_dir_index_item(trans, root, path, - old_inode->i_ino, - old_parent_oid, - "..", 2, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out_fail; - } - if (!di) { - ret = -ENOENT; - goto out_fail; - } - ret = btrfs_del_item(trans, root, path); - if (ret) { - ret = -EIO; - goto out_fail; - } - btrfs_release_path(root, path); - - ret = btrfs_insert_dir_item(trans, root, "..", 2, - old_inode->i_ino, location, - BTRFS_FT_DIR); - if (ret) - goto out_fail; - } - - - ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry); - if (ret) - goto out_fail; - - if (new_inode) { - new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry); - if (ret) - goto out_fail; - if (S_ISDIR(new_inode->i_mode)) - clear_nlink(new_inode); - else - drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); - } - ret = btrfs_add_link(trans, new_dentry, old_inode); - if (ret) - goto out_fail; - -out_fail: - btrfs_free_path(path); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - -static int btrfs_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *root = BTRFS_I(dir)->root; - struct btrfs_path *path; - struct btrfs_key key; - struct inode *inode; - int err; - int drop_inode = 0; - u64 objectid; - int name_len; - int datasize; - char *ptr; - struct btrfs_file_extent_item *ei; - - name_len = strlen(symname) + 1; - if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) - return -ENAMETOOLONG; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - - inode = btrfs_new_inode(trans, root, objectid, - BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_unlock; - - btrfs_set_trans_block_group(trans, inode); - err = btrfs_add_nondir(trans, dentry, inode); - if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; - } - dir->i_sb->s_dirt = 1; - btrfs_update_inode_block_group(trans, inode); - btrfs_update_inode_block_group(trans, dir); - if (drop_inode) - goto out_unlock; - - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - datasize = btrfs_file_extent_calc_inline_size(name_len); - err = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(err); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - inode->i_op = &btrfs_symlink_inode_operations; - inode->i_mapping->a_ops = &btrfs_symlink_aops; - inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; - -out_unlock: - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (drop_inode) { - inode_dec_link_count(inode); - iput(inode); - } - btrfs_btree_balance_dirty(root); - return err; -} - static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -2944,91 +159,21 @@ static struct super_operations btrfs_super_ops = { .statfs = btrfs_statfs, }; -static struct inode_operations btrfs_dir_inode_operations = { - .lookup = btrfs_lookup, - .create = btrfs_create, - .unlink = btrfs_unlink, - .link = btrfs_link, - .mkdir = btrfs_mkdir, - .rmdir = btrfs_rmdir, - .rename = btrfs_rename, - .symlink = btrfs_symlink, - .setattr = btrfs_setattr, -}; - -static struct inode_operations btrfs_dir_ro_inode_operations = { - .lookup = btrfs_lookup, -}; - -static struct file_operations btrfs_dir_file_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = btrfs_readdir, - .ioctl = btrfs_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, -#endif -}; - -static struct address_space_operations btrfs_aops = { - .readpage = btrfs_readpage, - .writepage = btrfs_writepage, - .sync_page = block_sync_page, - .prepare_write = btrfs_prepare_write, - .commit_write = btrfs_commit_write, - .bmap = btrfs_bmap, -}; - -static struct address_space_operations btrfs_symlink_aops = { - .readpage = btrfs_readpage, - .writepage = btrfs_writepage, -}; - -static struct inode_operations btrfs_file_inode_operations = { - .truncate = btrfs_truncate, - .getattr = btrfs_getattr, - .setattr = btrfs_setattr, -}; - -static struct file_operations btrfs_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = btrfs_file_aio_read, - .write = btrfs_file_write, - .mmap = generic_file_mmap, - .open = generic_file_open, - .ioctl = btrfs_ioctl, - .fsync = btrfs_sync_file, -#ifdef CONFIG_COMPAT - .compat_ioctl = btrfs_compat_ioctl, -#endif -}; - -static struct inode_operations btrfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, -}; - static int __init init_btrfs_fs(void) { int err; - printk("btrfs loaded!\n"); btrfs_init_transaction_sys(); - err = init_inodecache(); + err = btrfs_init_cachep(); if (err) return err; return register_filesystem(&btrfs_fs_type); - destroy_inodecache(); - return err; } static void __exit exit_btrfs_fs(void) { btrfs_exit_transaction_sys(); - destroy_inodecache(); + btrfs_destroy_cachep(); unregister_filesystem(&btrfs_fs_type); - printk("btrfs unloaded\n"); } module_init(init_btrfs_fs) -- cgit v1.2.3 From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/COPYING | 356 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/TODO | 1 - fs/btrfs/bit-radix.c | 18 +++ fs/btrfs/bit-radix.h | 18 +++ fs/btrfs/btrfs_inode.h | 18 +++ fs/btrfs/ctree.c | 18 +++ fs/btrfs/ctree.h | 18 +++ fs/btrfs/dir-item.c | 18 +++ fs/btrfs/disk-io.c | 18 +++ fs/btrfs/disk-io.h | 18 +++ fs/btrfs/extent-tree.c | 18 +++ fs/btrfs/file-item.c | 18 +++ fs/btrfs/file.c | 18 +++ fs/btrfs/hash.c | 18 +++ fs/btrfs/hash.h | 18 +++ fs/btrfs/inode-item.c | 18 +++ fs/btrfs/inode-map.c | 18 +++ fs/btrfs/inode.c | 18 +++ fs/btrfs/ioctl.h | 18 +++ fs/btrfs/print-tree.c | 18 +++ fs/btrfs/print-tree.h | 18 +++ fs/btrfs/root-tree.c | 18 +++ fs/btrfs/super.c | 18 +++ fs/btrfs/sysfs.c | 18 +++ fs/btrfs/transaction.c | 18 +++ fs/btrfs/transaction.h | 18 +++ 26 files changed, 788 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/COPYING (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/COPYING b/fs/btrfs/COPYING new file mode 100644 index 000000000000..ca442d313d86 --- /dev/null +++ b/fs/btrfs/COPYING @@ -0,0 +1,356 @@ + + NOTE! This copyright does *not* cover user programs that use kernel + services by normal system calls - this is merely considered normal use + of the kernel, and does *not* fall under the heading of "derived work". + Also note that the GPL below is copyrighted by the Free Software + Foundation, but the instance of code that it refers to (the Linux + kernel) is copyrighted by me and others who actually wrote it. + + Also note that the only valid version of the GPL as far as the kernel + is concerned is _this_ particular version of the license (ie v2, not + v2.2 or v3.x or whatever), unless explicitly otherwise stated. + + Linus Torvalds + +---------------------------------------- + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO index 4a48a2365d57..d9b6d38c603a 100644 --- a/fs/btrfs/TODO +++ b/fs/btrfs/TODO @@ -1,4 +1,3 @@ -* fix printk warnings * cleanup, add more error checking, get rid of BUG_ONs * Fix ENOSPC handling * Make allocator smarter diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c index 8f9cd4277231..a4ddf4de9797 100644 --- a/fs/btrfs/bit-radix.c +++ b/fs/btrfs/bit-radix.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "bit-radix.h" diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h index 4e717e30db4f..c100f54d5c32 100644 --- a/fs/btrfs/bit-radix.h +++ b/fs/btrfs/bit-radix.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BIT_RADIX__ #define __BIT_RADIX__ #include diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 6b50076b02d5..783f06b63259 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_I__ #define __BTRFS_I__ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 20e01f95ce6e..4736155af419 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 24410d42e3ce..4bd648d68e88 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS__ #define __BTRFS__ diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index b408a3d20c7b..0d179a44ebf4 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 878101172546..8c68a64c0ceb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 822ccb8aa4ae..7b76ccc48754 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __DISKIO__ #define __DISKIO__ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fe02fbfa6d59..975a0eb9241b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index d5a98827e38a..37dd3801a107 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0325dc038592..566e526b60ff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c index 32de1ea1b64e..e5c76903d722 100644 --- a/fs/btrfs/hash.c +++ b/fs/btrfs/hash.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + /* * Original copy from: * linux/fs/ext3/hash.c diff --git a/fs/btrfs/hash.h b/fs/btrfs/hash.h index e56c4670093d..d3be02670585 100644 --- a/fs/btrfs/hash.h +++ b/fs/btrfs/hash.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __HASH__ #define __HASH__ int btrfs_name_hash(const char *name, int len, u64 *hash_result); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index c3b990b661c2..449e9dbdf957 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 318e27a6378b..58930fc48be6 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1cabc1951b0e..46f869d6d0fb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497fadd98c3a..23bed48c51a7 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __IOCTL_ #define __IOCTL_ #include diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 21791f037561..1fa705683008 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 396041a05cfa..205cd03601ab 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __PRINT_TREE_ #define __PRINT_TREE_ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 13c30284fd84..7a2d63c1ee80 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 94b3535a2a11..ef8ad18b275c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index ed8b7e20cfbc..ead9dc8051be 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4f3c8ddcb401..30914521cf01 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include "ctree.h" diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index f25b4900db45..5fb1d322aa10 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __TRANSACTION__ #define __TRANSACTION__ #include "btrfs_inode.h" -- cgit v1.2.3 From 54aa1f4dfdacd60a19c4471220b24e581be6f774 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: Btrfs: Audit callers and return codes to make sure -ENOSPC gets up the stack Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 106 ++++++++++++++++++++++++------- fs/btrfs/ctree.h | 3 - fs/btrfs/dir-item.c | 6 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 141 ++++++++++++++++++++++++++++++++++-------- fs/btrfs/file-item.c | 46 ++------------ fs/btrfs/file.c | 131 ++++++++++++++++++++++++--------------- fs/btrfs/inode.c | 165 ++++++++++++++++++++++++++++++++----------------- fs/btrfs/root-tree.c | 1 - fs/btrfs/super.c | 3 +- fs/btrfs/transaction.c | 21 ++++--- 11 files changed, 410 insertions(+), 215 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 498dea0854c1..606a19b5916d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -73,6 +73,7 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root { struct buffer_head *cow; struct btrfs_node *cow_node; + int ret; if (btrfs_header_generation(btrfs_buffer_header(buf)) == trans->transid) { @@ -80,6 +81,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr); + if (IS_ERR(cow)) + return PTR_ERR(cow); cow_node = btrfs_buffer_node(cow); if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) WARN_ON(1); @@ -87,7 +90,9 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); btrfs_set_header_generation(&cow_node->header, trans->transid); btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); - btrfs_inc_ref(trans, root, buf); + ret = btrfs_inc_ref(trans, root, buf); + if (ret) + return ret; if (buf == root->node) { root->node = cow; get_bh(cow); @@ -320,6 +325,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root int wret; int pslot; int orig_slot = path->slots[level]; + int err_on_enospc = 0; u64 orig_ptr; if (level == 0) @@ -363,29 +369,43 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; + left_buf = read_node_slot(root, parent_buf, pslot - 1); right_buf = read_node_slot(root, parent_buf, pslot + 1); /* first, try to make some room in the middle buffer */ if (left_buf) { - btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, - &left_buf); + wret = btrfs_cow_block(trans, root, left_buf, + parent_buf, pslot - 1, &left_buf); + if (wret) { + ret = wret; + goto enospc; + } left = btrfs_buffer_node(left_buf); orig_slot += btrfs_header_nritems(&left->header); wret = push_node_left(trans, root, left_buf, mid_buf); if (wret < 0) ret = wret; + if (btrfs_header_nritems(&mid->header) < 2) + err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ if (right_buf) { - btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, - &right_buf); + wret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, &right_buf); + if (wret) { + ret = wret; + goto enospc; + } + right = btrfs_buffer_node(right_buf); wret = push_node_left(trans, root, mid_buf, right_buf); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(&right->header) == 0) { u64 blocknr = bh_blocknr(right_buf); @@ -421,8 +441,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ BUG_ON(!left_buf); wret = balance_node_right(trans, root, mid_buf, left_buf); - if (wret < 0) + if (wret < 0) { ret = wret; + goto enospc; + } BUG_ON(wret == 1); } if (btrfs_header_nritems(&mid->header) == 0) { @@ -467,7 +489,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), path->slots[level])) BUG(); - +enospc: if (right_buf) btrfs_block_release(root, right_buf); if (left_buf) @@ -519,10 +541,15 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); - left = btrfs_buffer_node(left_buf); - wret = push_node_left(trans, root, left_buf, mid_buf); + ret = btrfs_cow_block(trans, root, left_buf, parent_buf, + pslot - 1, &left_buf); + if (ret) + wret = 1; + else { + left = btrfs_buffer_node(left_buf); + wret = push_node_left(trans, root, + left_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -561,11 +588,16 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); - right = btrfs_buffer_node(right_buf); - wret = balance_node_right(trans, root, - right_buf, mid_buf); + ret = btrfs_cow_block(trans, root, right_buf, + parent_buf, pslot + 1, + &right_buf); + if (ret) + wret = 1; + else { + right = btrfs_buffer_node(right_buf); + wret = balance_node_right(trans, root, + right_buf, mid_buf); + } } if (wret < 0) ret = wret; @@ -631,6 +663,10 @@ again: p->nodes[level + 1], p->slots[level + 1], &cow_buf); + if (wret) { + btrfs_block_release(root, cow_buf); + return wret; + } b = cow_buf; c = btrfs_buffer_node(b); } @@ -737,6 +773,7 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root src_nritems = btrfs_header_nritems(&src->header); dst_nritems = btrfs_header_nritems(&dst->header); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; + if (push_items <= 0) { return 1; } @@ -827,6 +864,8 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->nodes[level-1] != root->node); t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr); + if (IS_ERR(t)) + return PTR_ERR(t); c = btrfs_buffer_node(t); memset(c, 0, root->blocksize); btrfs_set_header_nritems(&c->header, 1); @@ -929,10 +968,15 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_header_nritems(&c->header) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; + if (ret < 0) + return ret; } c_nritems = btrfs_header_nritems(&c->header); split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr); + if (IS_ERR(split_buffer)) + return PTR_ERR(split_buffer); + split = btrfs_buffer_node(split_buffer); btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); @@ -1022,6 +1066,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + int ret; slot = path->slots[1]; if (!path->nodes[1]) { @@ -1041,7 +1086,12 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } /* cow and double check */ - btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right_buf, upper, + slot + 1, &right_buf); + if (ret) { + btrfs_block_release(root, right_buf); + return 1; + } right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1162,7 +1212,11 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root } /* cow and double check */ - btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + if (ret) { + /* we hit -ENOSPC, but it isn't fatal here */ + return 1; + } left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { @@ -1309,8 +1363,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root slot = path->slots[0]; nritems = btrfs_header_nritems(&l->header); mid = (nritems + 1)/ 2; + right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1407,7 +1464,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (!double_split) return ret; right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr); - BUG_ON(!right_buffer); + if (IS_ERR(right_buffer)) + return PTR_ERR(right_buffer); + right = btrfs_buffer_leaf(right_buffer); memset(&right->header, 0, sizeof(right->header)); btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); @@ -1655,7 +1714,6 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root ptr, data, data_size); btrfs_mark_buffer_dirty(path->nodes[0]); } - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -1775,12 +1833,12 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, slot = path->slots[1]; get_bh(leaf_buf); wret = push_leaf_left(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; if (path->nodes[0] == leaf_buf && btrfs_header_nritems(&leaf->header)) { wret = push_leaf_right(trans, root, path, 1); - if (wret < 0) + if (wret < 0 && wret != -ENOSPC) ret = wret; } if (btrfs_header_nritems(&leaf->header) == 0) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 19a1d998fca7..77071f273977 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1126,9 +1126,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, char *data, size_t len); -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len); struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index ff10cf5a8422..d5db8bdea3d8 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -47,6 +47,8 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret) return ERR_PTR(ret); } + if (ret < 0) + return ERR_PTR(ret); WARN_ON(ret > 0); leaf = btrfs_buffer_leaf(path->nodes[0]); item = leaf->items + path->slots[0]; @@ -225,14 +227,13 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_leaf *leaf; u32 sub_item_len; u32 item_len; - int ret; + int ret = 0; leaf = btrfs_buffer_leaf(path->nodes[0]); sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); item_len = btrfs_item_size(leaf->items + path->slots[0]); if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); } else { char *ptr = (char *)di; char *start = btrfs_item_ptr(leaf, path->slots[0], char); @@ -240,7 +241,6 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); - BUG_ON(ret); } return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3712e946f9c8..865a284aa06c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -580,7 +580,7 @@ int close_ctree(struct btrfs_root *root) btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b38c3e92f0c8..8025e9f8ef19 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -100,6 +100,8 @@ static int cache_block_group(struct btrfs_root *root, if (slot >= btrfs_header_nritems(&leaf->header)) { reada_extent_leaves(root, path, limit); ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto err; if (ret == 0) { continue; } else { @@ -148,6 +150,7 @@ static int cache_block_group(struct btrfs_root *root, } block_group->cached = 1; +err: btrfs_free_path(path); return 0; } @@ -201,7 +204,9 @@ static u64 find_search_start(struct btrfs_root *root, last = max(last, cache->last_prealloc); } again: - cache_block_group(root, cache); + ret = cache_block_group(root, cache); + if (ret) + goto out; while(1) { ret = find_first_radix_bit(&root->fs_info->extent_map_radix, gang, last, ARRAY_SIZE(gang)); @@ -398,16 +403,23 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_key ins; u32 refs; - find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1, 0, - &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; + ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, + (u64)-1, 0, &ins, 0); + if (ret) { + btrfs_free_path(path); + return ret; + } key.objectid = blocknr; key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); + if (ret < 0) + return ret; if (ret != 0) { BUG(); } @@ -442,12 +454,14 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; if (ret != 0) BUG(); l = btrfs_buffer_leaf(path->nodes[0]); item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); *refs = btrfs_extent_refs(item); - btrfs_release_path(root->fs_info->extent_root, path); +out: btrfs_free_path(path); return 0; } @@ -469,6 +483,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int i; int leaf; int ret; + int faili; + int err; if (!root->ref_cows) return 0; @@ -491,14 +507,45 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, btrfs_file_extent_disk_num_blocks(fi)); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } else { blocknr = btrfs_node_blockptr(buf_node, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); - BUG_ON(ret); + if (ret) { + faili = i; + goto fail; + } } } return 0; +fail: + for (i =0; i < faili; i++) { + if (leaf) { + u64 disk_blocknr; + key = &buf_leaf->items[i].key; + if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + continue; + fi = btrfs_item_ptr(buf_leaf, i, + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(fi) == + BTRFS_FILE_EXTENT_INLINE) + continue; + disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + if (disk_blocknr == 0) + continue; + err = btrfs_free_extent(trans, root, disk_blocknr, + btrfs_file_extent_disk_num_blocks(fi), 0); + BUG_ON(err); + } else { + blocknr = btrfs_node_blockptr(buf_node, i); + err = btrfs_free_extent(trans, root, blocknr, 1, 0); + BUG_ON(err); + } + } + return ret; } static int write_one_cache_group(struct btrfs_trans_handle *trans, @@ -512,15 +559,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, struct btrfs_block_group_item *bi; struct btrfs_key ins; - find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, 0); + /* FIXME, set bit to recalc cache groups on next mount */ + if (ret) + return ret; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); + if (ret < 0) + goto fail; BUG_ON(ret); bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_block_group_item); memcpy(bi, &cache->item, sizeof(*bi)); mark_buffer_dirty(path->nodes[0]); btrfs_release_path(extent_root, path); - +fail: finish_current_insert(trans, extent_root); pending_ret = del_pending_extents(trans, extent_root); if (ret) @@ -543,6 +595,7 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, int werr = 0; int i; struct btrfs_path *path; + unsigned long off = 0; path = btrfs_alloc_path(); if (!path) @@ -550,18 +603,28 @@ static int write_dirty_block_radix(struct btrfs_trans_handle *trans, while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)cache, - 0, ARRAY_SIZE(cache), + off, ARRAY_SIZE(cache), BTRFS_BLOCK_GROUP_DIRTY); if (!ret) break; for (i = 0; i < ret; i++) { - radix_tree_tag_clear(radix, cache[i]->key.objectid + - cache[i]->key.offset - 1, - BTRFS_BLOCK_GROUP_DIRTY); err = write_one_cache_group(trans, root, path, cache[i]); - if (err) + /* + * if we fail to write the cache group, we want + * to keep it marked dirty in hopes that a later + * write will work + */ + if (err) { werr = err; + off = cache[i]->key.objectid + + cache[i]->key.offset; + continue; + } + + radix_tree_tag_clear(radix, cache[i]->key.objectid + + cache[i]->key.offset - 1, + BTRFS_BLOCK_GROUP_DIRTY); } } btrfs_free_path(path); @@ -801,14 +864,20 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; - find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0); if (ret) { - BUG(); + btrfs_free_path(path); + return ret; } + + ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); + if (ret < 0) + return ret; + BUG_ON(ret); ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_extent_item); BUG_ON(ei->refs == 0); @@ -827,8 +896,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_super_blocks_used(info->disk_super, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); - if (ret) - BUG(); + if (ret) { + return ret; + } ret = update_block_group(trans, root, blocknr, num_blocks, 0, mark_free, 0); BUG_ON(ret); @@ -1075,7 +1145,6 @@ next: path->slots[0]++; cond_resched(); } - // FIXME -ENOSPC check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation @@ -1246,7 +1315,15 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ret = find_free_extent(trans, root, num_blocks, search_start, search_end, hint_block, ins, data); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, num_blocks, search_start, + search_end, hint_block, ins, data); + if (ret) + return ret; } /* @@ -1271,7 +1348,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, search_end, hint_block, &prealloc_key, 0); if (ret) { - return ret; + if (search_start == 0) + return ret; + search_end = search_start - 1; + search_start = 0; + hint_block = search_start; + ret = find_free_extent(trans, root, 0, search_start, + search_end, hint_block, + &prealloc_key, 0); + if (ret) + return ret; } } @@ -1309,11 +1395,14 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, hint, (unsigned long)-1, &ins, 0); if (ret) { - BUG(); - return NULL; + BUG_ON(ret > 0); + return ERR_PTR(ret); } - BUG_ON(ret); buf = btrfs_find_create_tree_block(root, ins.objectid); + if (!buf) { + btrfs_free_extent(trans, root, ins.objectid, 1, 0); + return ERR_PTR(-ENOMEM); + } set_buffer_uptodate(buf); set_buffer_checked(buf); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index cf894f09f6c7..68859934ae25 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -45,6 +45,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &file_key, sizeof(*item)); + if (ret < 0) + goto out; BUG_ON(ret); item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); @@ -55,10 +57,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_generation(item, trans->transid); btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(path->nodes[0]); - - btrfs_release_path(root, path); +out: btrfs_free_path(path); - return 0; + return ret; } struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, @@ -213,6 +214,8 @@ insert: csum_offset = 0; ret = btrfs_insert_empty_item(trans, root, path, &file_key, BTRFS_CRC32_SIZE); + if (ret < 0) + goto fail; if (ret != 0) { WARN_ON(1); goto fail; @@ -261,40 +264,3 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, return ret; } -int btrfs_csum_verify_file_block(struct btrfs_root *root, - u64 objectid, u64 offset, - char *data, size_t len) -{ - int ret; - struct btrfs_key file_key; - struct btrfs_path *path; - struct btrfs_csum_item *item; - char result[BTRFS_CRC32_SIZE]; - - path = btrfs_alloc_path(); - BUG_ON(!path); - file_key.objectid = objectid; - file_key.offset = offset; - file_key.flags = 0; - btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); - mutex_lock(&root->fs_info->fs_mutex); - - item = btrfs_lookup_csum(NULL, root, path, objectid, offset, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = -ENOENT; - goto fail; - } - - ret = btrfs_csum_data(root, data, len, result); - WARN_ON(ret); - if (memcmp(result, &item->csum, BTRFS_CRC32_SIZE)) - ret = 1; -fail: - btrfs_release_path(root, path); - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 23f029359688..fef7ba1e707f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -81,6 +81,62 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } +static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, + u64 offset, ssize_t size, + struct buffer_head *bh) +{ + struct btrfs_key key; + struct btrfs_path *path; + char *ptr, *kaddr; + struct btrfs_trans_handle *trans; + struct btrfs_file_extent_item *ei; + u32 datasize; + int err = 0; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + key.objectid = inode->i_ino; + key.offset = offset; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); + BUG_ON(size >= PAGE_CACHE_SIZE); + datasize = btrfs_file_extent_calc_inline_size(size); + + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + if (ret) { + err = ret; + goto fail; + } + ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), + path->slots[0], struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(ei, trans->transid); + btrfs_set_file_extent_type(ei, + BTRFS_FILE_EXTENT_INLINE); + ptr = btrfs_file_extent_inline_start(ei); + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + btrfs_memcpy(root, path->nodes[0]->b_data, + ptr, kaddr + bh_offset(bh), + size); + kunmap_atomic(kaddr, KM_USER0); + mark_buffer_dirty(path->nodes[0]); +fail: + btrfs_free_path(path); + ret = btrfs_end_transaction(trans, root); + if (ret && !err) + err = ret; + mutex_unlock(&root->fs_info->fs_mutex); + return err; +} + static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -96,57 +152,22 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, int this_write; struct inode *inode = file->f_path.dentry->d_inode; struct buffer_head *bh; - struct btrfs_file_extent_item *ei; for (i = 0; i < num_pages; i++) { offset = pos & (PAGE_CACHE_SIZE -1); this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); - /* FIXME, one block at a time */ + /* FIXME, one block at a time */ bh = page_buffers(pages[i]); if (buffer_mapped(bh) && bh->b_blocknr == 0) { - struct btrfs_key key; - struct btrfs_path *path; - char *ptr, *kaddr; - u32 datasize; - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - /* create an inline extent, and copy the data in */ - path = btrfs_alloc_path(); - BUG_ON(!path); - key.objectid = inode->i_ino; - key.offset = pages[i]->index << PAGE_CACHE_SHIFT; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(write_bytes >= PAGE_CACHE_SIZE); - datasize = offset + - btrfs_file_extent_calc_inline_size(write_bytes); - - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - - kaddr = kmap_atomic(bh->b_page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + bh_offset(bh), - offset + write_bytes); - kunmap_atomic(kaddr, KM_USER0); - - mark_buffer_dirty(path->nodes[0]); - btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); + ret = insert_inline_extent(root, inode, + pages[i]->index << PAGE_CACHE_SHIFT, + offset + this_write, bh); + if (ret) { + err = ret; + goto failed; + } } ret = btrfs_commit_write(file, pages[i], offset, @@ -321,6 +342,7 @@ next_slot: btrfs_file_extent_disk_blocknr(extent); } ret = btrfs_del_item(trans, root, path); + /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; @@ -452,7 +474,8 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->blocksize -1) & ~((u64)root->blocksize - 1), &hint_block); - BUG_ON(err); + if (err) + goto failed_release; } /* insert any holes we need to create */ @@ -469,7 +492,8 @@ static int prepare_pages(struct btrfs_root *root, last_pos_in_file, 0, 0, hole_size); } - BUG_ON(err); + if (err) + goto failed_release; } /* @@ -481,11 +505,13 @@ static int prepare_pages(struct btrfs_root *root, err = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, hint_block, (u64)-1, &ins, 1); - BUG_ON(err); + if (err) + goto failed_truncate; err = btrfs_insert_file_extent(trans, root, inode->i_ino, start_pos, ins.objectid, ins.offset, ins.offset); - BUG_ON(err); + if (err) + goto failed_truncate; } else { ins.offset = 0; ins.objectid = 0; @@ -618,16 +644,21 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ret = prepare_pages(root, file, pages, num_pages, pos, first_index, last_index, write_bytes); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, buf); - BUG_ON(ret); + if (ret) { + btrfs_drop_pages(pages, num_pages); + goto out; + } ret = dirty_and_release_pages(NULL, root, file, pages, num_pages, pos, write_bytes); - BUG_ON(ret); btrfs_drop_pages(pages, num_pages); + if (ret) + goto out; buf += write_bytes; count -= write_bytes; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 94f1c28c25b9..6d031daa7779 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -212,7 +212,11 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto err; + } + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, name, name_len, -1); if (IS_ERR(di)) { @@ -225,7 +229,8 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, } objectid = btrfs_disk_key_objectid(&di->location); ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, @@ -239,7 +244,6 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, goto err; } ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); dentry->d_inode->i_ctime = dir->i_ctime; err: @@ -248,7 +252,7 @@ err: dir->i_size -= name_len * 2; btrfs_update_inode(trans, root, dir); drop_nlink(dentry->d_inode); - btrfs_update_inode(trans, root, dentry->d_inode); + ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } return ret; @@ -359,9 +363,10 @@ static int btrfs_free_inode(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location, -1); - BUG_ON(ret); - ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret > 0) + ret = -ENOENT; + if (!ret) + ret = btrfs_del_item(trans, root, path); btrfs_free_path(path); return ret; } @@ -516,7 +521,8 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, } if (del_item) { ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret) + goto error; } else { break; } @@ -577,19 +583,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) page->index << PAGE_CACHE_SHIFT, (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) + goto out; ret = btrfs_insert_file_extent(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, ins.objectid, 1, 1); - BUG_ON(ret); + if (ret) + goto out; SetPageChecked(page); kaddr = kmap(page); memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); flush_dcache_page(page); - btrfs_csum_file_block(trans, root, inode->i_ino, + ret = btrfs_csum_file_block(trans, root, inode->i_ino, page->index << PAGE_CACHE_SHIFT, kaddr, PAGE_CACHE_SIZE); kunmap(page); @@ -633,9 +642,10 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_set_trans_block_group(trans, inode); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); - BUG_ON(err); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); + if (err) + return err; } out: err = inode_setattr(inode, attr); @@ -657,12 +667,20 @@ void btrfs_delete_inode(struct inode *inode) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); - btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_free_inode(trans, root, inode); + if (ret) + goto no_delete_lock; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); return; + +no_delete_lock: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_btree_balance_dirty(root); no_delete: clear_inode(inode); } @@ -946,7 +964,7 @@ int btrfs_write_inode(struct inode *inode, int wait) } /* - * This is somewhat expense, updating the tree every time the + * This is somewhat expensive, updating the tree every time the * inode changes. But, it is most likely to find the inode in cache. * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. @@ -1002,8 +1020,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - BUG_ON(ret); - + if (ret) + return ERR_PTR(ret); insert_inode_hash(inode); return inode; } @@ -1121,7 +1139,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, drop_inode = 1; dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); - btrfs_update_inode(trans, root, inode); + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); @@ -1349,17 +1369,26 @@ not_found: ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, alloc_hint, (u64)-1, &ins, 1); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } ret = btrfs_insert_file_extent(trans, root, inode->i_ino, iblock << inode->i_blkbits, ins.objectid, ins.offset, ins.offset); - BUG_ON(ret); + if (ret) { + err = ret; + goto out; + } btrfs_map_bh_to_logical(root, result, ins.objectid); } out: - if (trans) - err = btrfs_end_transaction(trans, root); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } btrfs_free_path(path); return err; } @@ -1375,8 +1404,8 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } -int btrfs_get_block_csum(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) +static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, + struct buffer_head *result, int create) { int ret; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -1397,7 +1426,7 @@ int btrfs_get_block_csum(struct inode *inode, sector_t iblock, /* a csum that isn't present is a preallocated region. */ if (ret == -ENOENT || ret == -EFBIG) ret = 0; - result->b_private = 0; + result->b_private = NULL; goto out; } memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); @@ -1736,11 +1765,10 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, + btrfs_csum_file_block(trans, root, inode->i_ino, off, kaddr + bh_offset(bh), bh->b_size); kunmap(page); - BUG_ON(ret); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1930,7 +1958,6 @@ static void btrfs_truncate(struct inode *inode) /* FIXME, add redo link to tree so we don't leak on crash */ ret = btrfs_truncate_in_trans(trans, root, inode); - BUG_ON(ret); btrfs_update_inode(trans, root, inode); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); @@ -1970,6 +1997,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct inode *inode; struct inode *dir; int ret; + int err; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; @@ -1978,8 +2006,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); subvol = btrfs_alloc_free_block(trans, root, 0); - if (subvol == NULL) - return -ENOSPC; + if (IS_ERR(subvol)) + return PTR_ERR(subvol); leaf = btrfs_buffer_leaf(subvol); btrfs_set_header_nritems(&leaf->header, 0); btrfs_set_header_level(&leaf->header, 0); @@ -2005,7 +2033,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; btrfs_set_root_dirid(&root_item, new_dirid); @@ -2015,7 +2044,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2025,10 +2055,12 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root, name, namelen, dir->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); + if (ret) + goto fail_commit; new_root = btrfs_read_fs_root(root->fs_info, &key); BUG_ON(!new_root); @@ -2038,24 +2070,29 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode = btrfs_new_inode(trans, new_root, new_dirid, BTRFS_I(dir)->block_group, S_IFDIR | 0700); + if (IS_ERR(inode)) + goto fail; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; new_root->inode = inode; ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid); - BUG_ON(ret); + if (ret) + goto fail; inode->i_nlink = 1; inode->i_size = 6; ret = btrfs_update_inode(trans, new_root, inode); - BUG_ON(ret); - - ret = btrfs_commit_transaction(trans, new_root); - BUG_ON(ret); - + if (ret) + goto fail; +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; +fail_commit: mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } static int create_snapshot(struct btrfs_root *root, char *name, int namelen) @@ -2064,6 +2101,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item new_root_item; int ret; + int err; u64 objectid; if (!root->ref_cows) @@ -2074,11 +2112,13 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) BUG_ON(!trans); ret = btrfs_update_inode(trans, root, root->inode); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); - BUG_ON(ret); + if (ret) + goto fail; memcpy(&new_root_item, &root->root_item, sizeof(new_root_item)); @@ -2091,7 +2131,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); - BUG_ON(ret); + if (ret) + goto fail; /* * insert the directory item @@ -2102,16 +2143,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) root->fs_info->sb->s_root->d_inode->i_ino, &key, BTRFS_FT_DIR); - BUG_ON(ret); + if (ret) + goto fail; ret = btrfs_inc_root_ref(trans, root); - BUG_ON(ret); + if (ret) + goto fail; - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); +fail: + err = btrfs_commit_transaction(trans, root); + if (err && !ret) + ret = err; mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root); - return 0; + return ret; } int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int @@ -2148,12 +2193,13 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int btrfs_free_path(path); if (di && !IS_ERR(di)) return -EEXIST; + if (IS_ERR(di)) + return PTR_ERR(di); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args.name, namelen); else ret = create_snapshot(root, vol_args.name, namelen); - WARN_ON(ret); break; default: return -ENOTTY; @@ -2316,7 +2362,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_parent_oid = btrfs_disk_key_objectid(&di->location); ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2335,7 +2380,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, } ret = btrfs_del_item(trans, root, path); if (ret) { - ret = -EIO; goto out_fail; } btrfs_release_path(root, path); @@ -2361,7 +2405,9 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, clear_nlink(new_inode); else drop_nlink(new_inode); - btrfs_update_inode(trans, root, new_inode); + ret = btrfs_update_inode(trans, root, new_inode); + if (ret) + goto out_fail; } ret = btrfs_add_link(trans, new_dentry, old_inode); if (ret) @@ -2433,7 +2479,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, datasize); - BUG_ON(err); + if (err) { + drop_inode = 1; + goto out_unlock; + } ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], struct btrfs_file_extent_item); btrfs_set_file_extent_generation(ei, trans->transid); @@ -2447,13 +2496,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; - btrfs_update_inode(trans, root, inode); - err = 0; + err = btrfs_update_inode(trans, root, inode); + if (err) + drop_inode = 1; out_unlock: btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 8e1b9046d5ec..ac0fae7780ff 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -90,7 +90,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root { int ret; ret = btrfs_insert_item(trans, root, key, item, sizeof(*item)); - BUG_ON(ret); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ef8ad18b275c..c11ecf500202 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -125,9 +125,8 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); - return 0; + return ret; } static void btrfs_write_super(struct super_block *sb) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b5b99a85763f..321f8852755b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -219,7 +219,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root; int i; int ret; - int err; + int err = 0; + while(1) { ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0, ARRAY_SIZE(gang), @@ -251,11 +252,12 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); - BUG_ON(err); + if (err) + break; list_add(&dirty->list, list); } } - return 0; + return err; } static int drop_dirty_roots(struct btrfs_root *tree_root, @@ -263,7 +265,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; - int ret; + int ret = 0; while(!list_empty(list)) { mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); @@ -274,14 +276,15 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, BUG_ON(ret); ret = btrfs_del_root(trans, tree_root, &dirty->snap_key); - BUG_ON(ret); + if (ret) + break; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); kfree(dirty); mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root); } - return 0; + return ret; } int btrfs_commit_transaction(struct btrfs_trans_handle *trans, @@ -321,9 +324,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, } finish_wait(&trans->transaction->writer_wait, &wait); WARN_ON(cur_trans != trans->transaction); - add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots); + ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix, + &dirty_fs_roots); + BUG_ON(ret); + ret = btrfs_commit_tree_roots(trans, root); BUG_ON(ret); + cur_trans = root->fs_info->running_transaction; root->fs_info->running_transaction = NULL; if (cur_trans->list.prev != &root->fs_info->trans_list) { -- cgit v1.2.3 From 4b52dff6d371b9b93bc99f64c32831ea9a8ec3ac Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 10:06:50 -0400 Subject: Btrfs: Fix super block updates during transaction commit The super block written during commit was not consistent with the state of the trees. This change adds an in-memory copy of the super so that we can make sure to write out consistent data during a commit. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent-tree.c | 16 ++++++++-------- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 8 ++++++-- 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fb6fffb71dd0..1998f86df08a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -306,6 +306,7 @@ struct btrfs_fs_info { u64 generation; struct btrfs_transaction *running_transaction; struct btrfs_super_block *disk_super; + struct btrfs_super_block super_copy; struct buffer_head *sb_buffer; struct super_block *sb; struct inode *btree_inode; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7615e1578cc..7081729d5b16 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,6 +471,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -479,7 +481,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - fs_info->disk_super = disk_super; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -527,8 +528,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct buffer_head *bh = root->fs_info->sb_buffer; - btrfs_set_super_root(root->fs_info->disk_super, - bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8025e9f8ef19..7e550343aee7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -796,8 +796,8 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) { ins.objectid = extent_root->fs_info->extent_tree_insert[i]; - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + 1); ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item, sizeof(extent_item)); @@ -892,8 +892,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); if (ret) { @@ -1032,7 +1032,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root info->extent_tree_prealloc_nr = 0; } if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(info->disk_super); + search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { block_group = btrfs_lookup_block_group(info, hint_block); block_group = btrfs_find_block_group(root, block_group, @@ -1361,8 +1361,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, } } - super_blocks_used = btrfs_super_blocks_used(info->disk_super); - btrfs_set_super_blocks_used(info->disk_super, super_blocks_used + + super_blocks_used = btrfs_super_blocks_used(&info->super_copy); + btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); ret = btrfs_insert_item(trans, extent_root, ins, &extent_item, sizeof(extent_item)); @@ -1737,7 +1737,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) BTRFS_BLOCK_GROUP_AVAIL); } if (key.objectid >= - btrfs_super_total_blocks(info->disk_super)) + btrfs_super_total_blocks(&info->super_copy)) break; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c11ecf500202..2e797d5fb281 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -144,7 +144,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); - struct btrfs_super_block *disk_super = root->fs_info->disk_super; + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_blocks(disk_super); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a5a63d471e43..3b2face593e9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -380,6 +380,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else prev_trans->use_count++; } + btrfs_set_super_generation(&root->fs_info->super_copy, + cur_trans->transid); + btrfs_set_super_root(&root->fs_info->super_copy, + bh_blocknr(root->fs_info->tree_root->node)); + memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); @@ -389,8 +395,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(prev_trans); mutex_unlock(&root->fs_info->trans_mutex); } - btrfs_set_super_generation(root->fs_info->disk_super, - cur_trans->transid); BUG_ON(ret); write_ctree_super(trans, root); -- cgit v1.2.3 From e9d0b13b5bbb58c9b840e407a8d181442f799966 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 10 Aug 2007 14:06:19 -0400 Subject: Btrfs: Btree defrag on the extent-mapping tree as well Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 18 ++++++---- fs/btrfs/ctree.h | 4 ++- fs/btrfs/extent-tree.c | 28 +++++++++++++++- fs/btrfs/inode.c | 23 ++----------- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 91 +++++++++++++++++++++++++++----------------------- fs/btrfs/transaction.h | 2 ++ fs/btrfs/tree-defrag.c | 36 ++++++++++++++++---- 8 files changed, 125 insertions(+), 79 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c7e47e77723f..ee1ae00d2827 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -99,7 +99,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (ret) return ret; } else { - WARN_ON(!root->ref_cows); clean_tree_block(trans, root, buf); } @@ -162,13 +161,14 @@ static int close_blocks(u64 blocknr, u64 other) int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only) + int cache_only, u64 *last_ret) { struct btrfs_node *parent_node; struct buffer_head *cur_bh; struct buffer_head *tmp_bh; u64 blocknr; - u64 search_start = 0; + u64 search_start = *last_ret; + u64 last_block = 0; u64 other; u32 parent_nritems; int start_slot; @@ -198,6 +198,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; blocknr = btrfs_node_blockptr(parent_node, i); + if (last_block == 0) + last_block = blocknr; if (i > 0) { other = btrfs_node_blockptr(parent_node, i - 1); close = close_blocks(blocknr, other); @@ -206,8 +208,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent_node, i + 1); close = close_blocks(blocknr, other); } - if (close) + if (close) { + last_block = blocknr; continue; + } cur_bh = btrfs_find_tree_block(root, blocknr); if (!cur_bh || !buffer_uptodate(cur_bh) || @@ -219,9 +223,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(cur_bh); cur_bh = read_tree_block(root, blocknr); } - if (search_start == 0) { - search_start = bh_blocknr(cur_bh) & ~((u64)65535); - } + if (search_start == 0) + search_start = last_block & ~((u64)65535); + err = __btrfs_cow_block(trans, root, cur_bh, parent, i, &tmp_bh, search_start, min(8, end_slot - i)); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 59e09e37ab93..d3cd564b3b3f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1019,6 +1019,8 @@ static inline void btrfs_memmove(struct btrfs_root *root, btrfs_item_offset((leaf)->items + (slot)))) /* extent-tree.c */ +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, @@ -1066,7 +1068,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *parent, - int cache_only); + int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 120c448f58f8..3418bb62b996 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -396,6 +396,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return 0; } +int btrfs_extent_post_op(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + finish_current_insert(trans, root->fs_info->extent_root); + del_pending_extents(trans, root->fs_info->extent_root); + return 0; +} + static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, u32 *refs) @@ -1374,7 +1382,25 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); continue; } - next = read_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, blocknr); + if (!next || !buffer_uptodate(next)) { + brelse(next); + mutex_unlock(&root->fs_info->fs_mutex); + next = read_tree_block(root, blocknr); + mutex_lock(&root->fs_info->fs_mutex); + + /* we dropped the lock, check one more time */ + ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + BUG_ON(ret); + if (refs != 1) { + path->slots[*level]++; + brelse(next); + ret = btrfs_free_extent(trans, root, + blocknr, 1, 1); + BUG_ON(ret); + continue; + } + } WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 12aa043b9f6f..5c05ecbc5726 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2159,9 +2159,7 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ioctl_vol_args vol_args; - struct btrfs_trans_handle *trans; int ret = 0; - int err; struct btrfs_dir_item *di; int namelen; struct btrfs_path *path; @@ -2201,25 +2199,8 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int case BTRFS_IOC_DEFRAG: mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - memset(&root->defrag_progress, 0, - sizeof(root->defrag_progress)); - while (1) { - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 0); - - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - btrfs_btree_balance_dirty(root); - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - btrfs_end_transaction(trans, root); + btrfs_defrag_root(root, 0); + btrfs_defrag_root(root->fs_info->extent_root, 0); mutex_unlock(&root->fs_info->fs_mutex); ret = 0; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2e797d5fb281..74f3de47423c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -121,7 +121,9 @@ static int btrfs_sync_fs(struct super_block *sb, int wait) filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; } + btrfs_clean_old_snapshots(root); mutex_lock(&root->fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 204337c5ca0f..c9d52dc83e48 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -317,18 +317,47 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) +{ + struct btrfs_fs_info *info = root->fs_info; + int ret; + struct btrfs_trans_handle *trans; + + if (root->defrag_running) + return 0; + + trans = btrfs_start_transaction(root, 1); + while (1) { + root->defrag_running = 1; + ret = btrfs_defrag_leaves(trans, root, cacheonly); + btrfs_end_transaction(trans, root); + mutex_unlock(&info->fs_mutex); + + btrfs_btree_balance_dirty(root); + cond_resched(); + + mutex_lock(&info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (ret != -EAGAIN) + break; + } + root->defrag_running = 0; + radix_tree_tag_clear(&info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_DEFRAG_TAG); + btrfs_end_transaction(trans, root); + return 0; +} + int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) { struct btrfs_root *gang[1]; struct btrfs_root *root; - struct btrfs_root *tree_root = info->tree_root; - struct btrfs_trans_handle *trans; int i; int ret; int err = 0; u64 last = 0; - trans = btrfs_start_transaction(tree_root, 1); while(1) { ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, (void **)gang, last, @@ -339,37 +368,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); - if (root->defrag_running) - continue; - - while (1) { - mutex_lock(&root->fs_info->trans_mutex); - record_root_in_trans(root); - mutex_unlock(&root->fs_info->trans_mutex); - - root->defrag_running = 1; - err = btrfs_defrag_leaves(trans, root, 1); - btrfs_end_transaction(trans, tree_root); - mutex_unlock(&info->fs_mutex); - - btrfs_btree_balance_dirty(root); - cond_resched(); - - mutex_lock(&info->fs_mutex); - trans = btrfs_start_transaction(tree_root, 1); - if (err != -EAGAIN) - break; - } - root->defrag_running = 0; - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); + btrfs_defrag_root(root, 1); } } - btrfs_end_transaction(trans, tree_root); + btrfs_defrag_root(info->extent_root, 1); return err; } @@ -527,6 +529,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +int btrfs_clean_old_snapshots(struct btrfs_root *root) +{ + struct list_head dirty_roots; + INIT_LIST_HEAD(&dirty_roots); + + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&root->fs_info->dead_roots, &dirty_roots); + mutex_unlock(&root->fs_info->trans_mutex); + + if (!list_empty(&dirty_roots)) { + drop_dirty_roots(root, &dirty_roots); + } + return 0; +} void btrfs_transaction_cleaner(struct work_struct *work) { struct btrfs_fs_info *fs_info = container_of(work, @@ -536,12 +552,10 @@ void btrfs_transaction_cleaner(struct work_struct *work) struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; - struct list_head dirty_roots; unsigned long now; unsigned long delay = HZ * 30; int ret; - INIT_LIST_HEAD(&dirty_roots); mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -561,14 +575,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) ret = btrfs_commit_transaction(trans, root); out: mutex_unlock(&root->fs_info->fs_mutex); - - mutex_lock(&root->fs_info->trans_mutex); - list_splice_init(&root->fs_info->dead_roots, &dirty_roots); - mutex_unlock(&root->fs_info->trans_mutex); - - if (!list_empty(&dirty_roots)) { - drop_dirty_roots(root, &dirty_roots); - } + btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 65a395eeca91..d5f491d3757e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,5 +73,7 @@ void btrfs_init_transaction_sys(void); void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); +int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); +int btrfs_clean_old_snapshots(struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 7ea66b4aa5c2..a09064a9a41c 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -42,16 +42,20 @@ static void reada_defrag(struct btrfs_root *root, static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level, - int cache_only) + int cache_only, u64 *last_ret) { struct buffer_head *next; struct buffer_head *cur; u64 blocknr; int ret = 0; + int is_extent = 0; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); + if (root->fs_info->extent_root == root) + is_extent = 1; + while(*level > 0) { WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); @@ -70,7 +74,10 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, if (*level == 1) { ret = btrfs_realloc_node(trans, root, path->nodes[*level], - cache_only); + cache_only, last_ret); + if (is_extent) + btrfs_extent_post_op(trans, root); + break; } blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), @@ -90,8 +97,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, next, cache_only); + ret = btrfs_realloc_node(trans, root, next, cache_only, + last_ret); BUG_ON(ret); + + if (is_extent) + btrfs_extent_post_op(trans, root); + WARN_ON(*level <= 0); if (path->nodes[*level-1]) btrfs_block_release(root, path->nodes[*level-1]); @@ -148,10 +160,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int level; int orig_level; int i; + int is_extent = 0; + u64 last_ret = 0; + + if (root->fs_info->extent_root == root) + is_extent = 1; - if (root->ref_cows == 0) { + if (root->ref_cows == 0 && !is_extent) goto out; - } path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -165,16 +181,21 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, get_bh(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); - ret = btrfs_realloc_node(trans, root, root->node, cache_only); + ret = btrfs_realloc_node(trans, root, root->node, cache_only, + &last_ret); BUG_ON(ret); path->nodes[level] = root->node; path->slots[level] = 0; + if (is_extent) + btrfs_extent_post_op(trans, root); } else { level = root->defrag_level; path->lowest_level = level; wret = btrfs_search_slot(trans, root, &root->defrag_progress, path, 0, 1); + if (is_extent) + btrfs_extent_post_op(trans, root); if (wret < 0) { ret = wret; goto out; @@ -188,7 +209,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } while(1) { - wret = defrag_walk_down(trans, root, path, &level, cache_only); + wret = defrag_walk_down(trans, root, path, &level, cache_only, + &last_ret); if (wret > 0) break; if (wret < 0) -- cgit v1.2.3 From a52d9a8033c454cd9b4697cfafb467509fc1693f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Extent based page cache code. This uses an rbtree of extents and tests instead of buffer heads. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 3 + fs/btrfs/ctree.h | 7 + fs/btrfs/disk-io.h | 1 - fs/btrfs/extent_map.c | 1656 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_map.h | 89 +++ fs/btrfs/file.c | 295 ++++----- fs/btrfs/inode.c | 816 +++++++++--------------- fs/btrfs/super.c | 2 + 9 files changed, 2203 insertions(+), 669 deletions(-) create mode 100644 fs/btrfs/extent_map.c create mode 100644 fs/btrfs/extent_map.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9321438c5eef..ea7a22870350 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,8 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o + transaction.o bit-radix.o inode.o file.o tree-defrag.o \ + extent_map.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index f446f161f736..d1d5af471c3c 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -19,11 +19,14 @@ #ifndef __BTRFS_I__ #define __BTRFS_I__ +#include "extent_map.h" + /* in memory btrfs inode */ struct btrfs_inode { struct btrfs_root *root; struct btrfs_block_group_cache *block_group; struct btrfs_key location; + struct extent_map_tree extent_tree; struct inode vfs_inode; /* diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c7f5161271d3..947f061ed118 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1176,7 +1176,14 @@ int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); int btrfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *result, int create); +struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create); +int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode); /* file.c */ +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 81fd18cbd824..5261733b8735 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -77,7 +77,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks); int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical); -int btrfs_releasepage(struct page *page, gfp_t flags); void btrfs_btree_balance_dirty(struct btrfs_root *root); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct buffer_head *bh); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c new file mode 100644 index 000000000000..d378edf0964e --- /dev/null +++ b/fs/btrfs/extent_map.c @@ -0,0 +1,1656 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_map.h" + +static struct kmem_cache *extent_map_cache; +static struct kmem_cache *extent_state_cache; + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) + +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +static LIST_HEAD(all_states); +spinlock_t state_lock = SPIN_LOCK_UNLOCKED; + +void __init extent_map_init(void) +{ + extent_map_cache = kmem_cache_create("extent_map", + sizeof(struct extent_map), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); + extent_state_cache = kmem_cache_create("extent_state", + sizeof(struct extent_state), 0, + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU, + NULL); +} + +void __exit extent_map_exit(void) +{ + while(!list_empty(&all_states)) { + struct extent_state *state; + struct list_head *cur = all_states.next; + state = list_entry(cur, struct extent_state, list); + printk("found leaked state %Lu %Lu state %d in_tree %d\n", + state->start, state->end, state->state, state->in_tree); + list_del(&state->list); + kfree(state); + } + if (extent_map_cache) + kmem_cache_destroy(extent_map_cache); + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); +} + +void extent_map_tree_init(struct extent_map_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->map.rb_node = NULL; + tree->state.rb_node = NULL; + rwlock_init(&tree->lock); + tree->mapping = mapping; +} +EXPORT_SYMBOL(extent_map_tree_init); + +struct extent_map *alloc_extent_map(gfp_t mask) +{ + struct extent_map *em; + em = kmem_cache_alloc(extent_map_cache, mask); + if (!em || IS_ERR(em)) + return em; + em->in_tree = 0; + atomic_set(&em->refs, 1); + return em; +} +EXPORT_SYMBOL(alloc_extent_map); + +void free_extent_map(struct extent_map *em) +{ + if (atomic_dec_and_test(&em->refs)) { + WARN_ON(em->in_tree); + kmem_cache_free(extent_map_cache, em); + } +} +EXPORT_SYMBOL(free_extent_map); + + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + spin_lock_irq(&state_lock); + list_add(&state->list, &all_states); + spin_unlock_irq(&state_lock); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irq(&state_lock); + list_del_init(&state->list); + spin_unlock_irq(&state_lock); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + if (!prev_ret) + return NULL; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev); + if (!ret) + return prev; + return ret; +} + +static int tree_delete(struct rb_root *root, u64 offset) +{ + struct rb_node *node; + struct tree_entry *entry; + + node = __tree_search(root, offset, NULL); + if (!node) + return -ENOENT; + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 0; + rb_erase(node, root); + return 0; +} + +/* + * add_extent_mapping tries a simple backward merge with existing + * mappings. The extent_map struct passed in will be inserted into + * the tree directly (no copies made, just a reference taken). + */ +int add_extent_mapping(struct extent_map_tree *tree, + struct extent_map *em) +{ + int ret = 0; + struct extent_map *prev = NULL; + struct rb_node *rb; + + write_lock_irq(&tree->lock); + rb = tree_insert(&tree->map, em->end, &em->rb_node); + if (rb) { + prev = rb_entry(rb, struct extent_map, rb_node); + printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end); + ret = -EEXIST; + goto out; + } + atomic_inc(&em->refs); + if (em->start != 0) { + rb = rb_prev(&em->rb_node); + if (rb) + prev = rb_entry(rb, struct extent_map, rb_node); + if (prev && prev->end + 1 == em->start && + ((em->block_start == 0 && prev->block_start == 0) || + (em->block_start == prev->block_end + 1))) { + em->start = prev->start; + em->block_start = prev->block_start; + rb_erase(&prev->rb_node, &tree->map); + prev->in_tree = 0; + free_extent_map(prev); + } + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(add_extent_mapping); + +/* + * lookup_extent_mapping returns the first extent_map struct in the + * tree that intersects the [start, end] (inclusive) range. There may + * be additional objects in the tree that intersect, so check the object + * returned carefully to make sure you don't need additional lookups. + */ +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 end) +{ + struct extent_map *em; + struct rb_node *rb_node; + + read_lock_irq(&tree->lock); + rb_node = tree_search(&tree->map, start); + if (!rb_node) { + em = NULL; + goto out; + } + if (IS_ERR(rb_node)) { + em = ERR_PTR(PTR_ERR(rb_node)); + goto out; + } + em = rb_entry(rb_node, struct extent_map, rb_node); + if (em->end < start || em->start > end) { + em = NULL; + goto out; + } + atomic_inc(&em->refs); +out: + read_unlock_irq(&tree->lock); + return em; +} +EXPORT_SYMBOL(lookup_extent_mapping); + +/* + * removes an extent_map struct from the tree. No reference counts are + * dropped, and no checks are done to see if the range is in use + */ +int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) +{ + int ret; + + write_lock_irq(&tree->lock); + ret = tree_delete(&tree->map, em->end); + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(remove_extent_mapping); + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_map_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + state->state |= bits; + state->start = start; + state->end = end; + if ((end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", start, end); + WARN_ON(1); + } + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_map_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + if ((prealloc->end & 4095) == 0) { + printk("insert state %Lu %Lu strange end\n", prealloc->start, + prealloc->end); + WARN_ON(1); + } + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); +printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_map_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start >= end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_map_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + state->state |= bits; + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + prealloc->state |= bits; + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + goto search_again; + +out: + write_unlock_irq(&tree->lock); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irq(&tree->lock); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + + read_lock_irq(&tree->lock); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + break; + + if (filled && state->start > start) { + bitset = 0; + break; + } + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + } + read_unlock_irq(&tree->lock); + return bitset; +} + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_map_tree *tree, + struct page *page) +{ + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_map_tree *tree = bio->bi_private; + u64 start; + u64 end; + + if (bio->bi_size) + return 1; + + do { + struct page *page = bvec->bv_page; + start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); + return 0; +} + +static int submit_extent_page(int rw, struct extent_map_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + bio_end_io_t end_io_func) +{ + struct bio *bio; + int ret = 0; + + bio = bio_alloc(GFP_NOIO, 1); + + bio->bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_io_vec[0].bv_page = page; + bio->bi_io_vec[0].bv_len = size; + bio->bi_io_vec[0].bv_offset = offset; + + bio->bi_vcnt = 1; + bio->bi_idx = 0; + bio->bi_size = size; + + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + + bio_get(bio); + submit_bio(rw, bio); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + + bio_put(bio); + return ret; +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + iosize = PAGE_CACHE_SIZE - page_offset; + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + + iosize = min(em->end - cur, end - cur) + 1; + cur_end = min(em->end, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == 0) { + zero_user_page(page, page_offset, iosize, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_readpage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + zero_user_page(page, offset, + PAGE_CACHE_SIZE - offset, KM_USER0); + } + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + end = page_end; + lock_extent(tree, start, page_end, GFP_NOFS); + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, end, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(em->end < cur); + BUG_ON(end < cur); + iosize = min(em->end - cur, end - cur) + 1; + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + set_range_writeback(tree, cur, cur + iosize - 1); + ret = submit_extent_page(WRITE, tree, page, + sector, iosize, page_offset, bdev, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} +EXPORT_SYMBOL(extent_write_full_page); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_map_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = (page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, em->end); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if (!isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize - 1) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + + while (start <= end) { + em = lookup_extent_mapping(tree, start, end); + if (!em || IS_ERR(em)) + break; + if (test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + free_extent_map(em); + start = em->end + 1; +printk("range still locked %Lu %Lu\n", em->start, em->end); + break; + } + remove_extent_mapping(tree, em); + start = em->end + 1; + /* once for the rb tree */ + free_extent_map(em); + /* once for us */ + free_extent_map(em); + } + WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return 1; +} +EXPORT_SYMBOL(try_release_extent_mapping); + diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h new file mode 100644 index 000000000000..108944aab4bd --- /dev/null +++ b/fs/btrfs/extent_map.h @@ -0,0 +1,89 @@ +#ifndef __EXTENTMAP__ +#define __EXTENTMAP__ + +#include + +#define EXTENT_MAP_INLINE (u64)-2 +#define EXTENT_MAP_DELALLOC (u64)-1 + +struct extent_map_tree { + struct rb_root map; + struct rb_root state; + struct address_space *mapping; + rwlock_t lock; +}; + +/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ +struct extent_map { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + /* block_start and block_end are in bytes */ + u64 block_start; + u64 block_end; /* inclusive */ + struct block_device *bdev; + atomic_t refs; +}; + +/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + struct list_head list; +}; + +struct extent_buffer { + u64 start; + u64 end; /* inclusive */ + char *addr; + struct page *pages[]; +}; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 end, + int create); + +void extent_map_tree_init(struct extent_map_tree *tree, + struct address_space *mapping, gfp_t mask); +struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, + u64 start, u64 end); +int add_extent_mapping(struct extent_map_tree *tree, + struct extent_map *em); +int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); +int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); +int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +struct extent_map *alloc_extent_map(gfp_t mask); +void free_extent_map(struct extent_map *em); +int extent_read_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent); +void __init extent_map_init(void); +void __exit extent_map_exit(void); +int extent_clean_all_trees(struct extent_map_tree *tree); +int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); +int extent_invalidatepage(struct extent_map_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_map_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_prepare_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_map_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); +#endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6933ab11a5cd..71a481894ab6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -81,14 +81,14 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) } } -static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, +static int insert_inline_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode, u64 offset, ssize_t size, - struct buffer_head *bh) + struct page *page, size_t page_offset) { struct btrfs_key key; struct btrfs_path *path; char *ptr, *kaddr; - struct btrfs_trans_handle *trans; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -98,8 +98,6 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, if (!path) return -ENOMEM; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); key.objectid = inode->i_ino; @@ -122,18 +120,13 @@ static int insert_inline_extent(struct btrfs_root *root, struct inode *inode, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(bh->b_page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER0); btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + bh_offset(bh), - size); + ptr, kaddr + page_offset, size); kunmap_atomic(kaddr, KM_USER0); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_free_path(path); - ret = btrfs_end_transaction(trans, root); - if (ret && !err) - err = ret; - mutex_unlock(&root->fs_info->fs_mutex); return err; } @@ -145,45 +138,143 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, loff_t pos, size_t write_bytes) { - int i; - int offset; int err = 0; - int ret; - int this_write; + int i; struct inode *inode = file->f_path.dentry->d_inode; - struct buffer_head *bh; + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_key ins; + u64 hint_block; + u64 num_blocks; + u64 start_pos; + u64 end_of_last_block; + u64 end_pos = pos + write_bytes; + loff_t isize = i_size_read(inode); - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); + em = alloc_extent_map(GFP_NOFS); + if (!em) + return -ENOMEM; - /* FIXME, one block at a time */ - bh = page_buffers(pages[i]); + em->bdev = inode->i_sb->s_bdev; - if (buffer_mapped(bh) && bh->b_blocknr == 0) { - ret = insert_inline_extent(root, inode, - pages[i]->index << PAGE_CACHE_SHIFT, - offset + this_write, bh); - if (ret) { - err = ret; - goto failed; - } - } + start_pos = pos & ~((u64)root->blocksize - 1); + num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + inode->i_blkbits; - ret = btrfs_commit_write(file, pages[i], offset, - offset + this_write); - pos += this_write; - if (ret) { - err = ret; + end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + if (!trans) { + err = -ENOMEM; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); + inode->i_blocks += num_blocks << 3; + hint_block = 0; + + if ((end_of_last_block & 4095) == 0) { + printk("strange end of last %Lu %lu %Lu\n", start_pos, write_bytes, end_of_last_block); + } + set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + + /* FIXME...EIEIO, ENOSPC and more */ + + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, + start_pos, (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; + } + + /* insert any holes we need to create */ + if (inode->i_size < start_pos) { + u64 last_pos_in_file; + u64 hole_size; + u64 mask = root->blocksize - 1; + last_pos_in_file = (isize + mask) & ~mask; + hole_size = (start_pos - last_pos_in_file + mask) & ~mask; + hole_size >>= inode->i_blkbits; + if (last_pos_in_file < start_pos) { + err = btrfs_insert_file_extent(trans, root, + inode->i_ino, + last_pos_in_file, + 0, 0, hole_size); + } + if (err) goto failed; + } + + /* + * either allocate an extent for the new bytes or setup the key + * to show we are doing inline data in the extent + */ + if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || + pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + err = btrfs_alloc_extent(trans, root, inode->i_ino, + num_blocks, 0, hint_block, (u64)-1, + &ins, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, + start_pos, ins.objectid, ins.offset, + ins.offset); + BUG_ON(err); + em->start = start_pos; + em->end = end_of_last_block; + em->block_start = ins.objectid << inode->i_blkbits; + em->block_end = em->block_start + + (ins.offset << inode->i_blkbits) - 1; + set_extent_dirty(em_tree, start_pos, end_of_last_block, + GFP_NOFS); + err = add_extent_mapping(em_tree, em); + for (i = 0; i < num_pages; i++) { + struct page *p = pages[i]; + SetPageUptodate(p); + __set_page_dirty_nobuffers(p); } - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; + } else { + struct page *p = pages[0]; + err = insert_inline_extent(trans, root, inode, start_pos, + end_pos - start_pos, p, 0); + BUG_ON(err); + em->start = start_pos; + em->end = end_pos; + em->block_start = EXTENT_MAP_INLINE; + em->block_end = EXTENT_MAP_INLINE; + add_extent_mapping(em_tree, em); + } + if (end_pos > isize) { + i_size_write(inode, end_pos); + btrfs_update_inode(trans, root, inode); } failed: + err = btrfs_end_transaction(trans, root); +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); + free_extent_map(em); return err; } +int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) +{ + struct extent_map *em; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + + while(1) { + em = lookup_extent_mapping(em_tree, start, end); + if (!em) + break; + remove_extent_mapping(em_tree, em); + /* once for us */ + free_extent_map(em); + /* once for the tree*/ + free_extent_map(em); + } + return 0; +} + /* * this is very complex, but the basic idea is to drop all extents * in the range start - end. hint_block is filled in with a block number @@ -213,6 +304,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, int found_inline; int recow; + btrfs_drop_extent_cache(inode, start, end - 1); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -434,18 +527,9 @@ static int prepare_pages(struct btrfs_root *root, int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; - int offset; int err = 0; - int this_write; - struct buffer_head *bh; - struct buffer_head *head; - loff_t isize = i_size_read(inode); - struct btrfs_trans_handle *trans; - u64 hint_block; u64 num_blocks; - u64 alloc_extent_start; u64 start_pos; - struct btrfs_key ins; start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> @@ -457,119 +541,17 @@ static int prepare_pages(struct btrfs_root *root, pages[i] = grab_cache_page(inode->i_mapping, index + i); if (!pages[i]) { err = -ENOMEM; - goto failed_release; + BUG_ON(1); } cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); wait_on_page_writeback(pages[i]); - } - - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - mutex_unlock(&root->fs_info->fs_mutex); - goto out_unlock; - } - btrfs_set_trans_block_group(trans, inode); - /* FIXME blocksize != 4096 */ - inode->i_blocks += num_blocks << 3; - hint_block = 0; - - /* FIXME...EIEIO, ENOSPC and more */ - - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed_release; - } - - /* insert any holes we need to create */ - if (inode->i_size < start_pos) { - u64 last_pos_in_file; - u64 hole_size; - u64 mask = root->blocksize - 1; - last_pos_in_file = (isize + mask) & ~mask; - hole_size = (start_pos - last_pos_in_file + mask) & ~mask; - hole_size >>= inode->i_blkbits; - if (last_pos_in_file < start_pos) { - err = btrfs_insert_file_extent(trans, root, - inode->i_ino, - last_pos_in_file, - 0, 0, hole_size); - } - if (err) - goto failed_release; - } - - /* - * either allocate an extent for the new bytes or setup the key - * to show we are doing inline data in the extent - */ - if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - if (err) - goto failed_truncate; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - if (err) - goto failed_truncate; - } else { - ins.offset = 0; - ins.objectid = 0; - } - BUG_ON(err); - alloc_extent_start = ins.objectid; - err = btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - - for (i = 0; i < num_pages; i++) { - offset = pos & (PAGE_CACHE_SIZE -1); - this_write = min((size_t)PAGE_CACHE_SIZE - offset, write_bytes); - if (!page_has_buffers(pages[i])) { - create_empty_buffers(pages[i], - root->fs_info->sb->s_blocksize, - (1 << BH_Uptodate)); + if (!PagePrivate(pages[i])) { + SetPagePrivate(pages[i]); + set_page_private(pages[i], 1); + page_cache_get(pages[i]); } - head = page_buffers(pages[i]); - bh = head; - do { - err = btrfs_map_bh_to_logical(root, bh, - alloc_extent_start); - BUG_ON(err); - if (err) - goto failed_truncate; - bh = bh->b_this_page; - if (alloc_extent_start) - alloc_extent_start++; - } while (bh != head); - pos += this_write; - WARN_ON(this_write > write_bytes); - write_bytes -= this_write; } return 0; - -failed_release: - btrfs_drop_pages(pages, num_pages); - return err; - -failed_truncate: - btrfs_drop_pages(pages, num_pages); - if (pos > isize) - vmtruncate(inode, isize); - return err; - -out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); - goto failed_release; - } static ssize_t btrfs_file_write(struct file *file, const char __user *buf, @@ -685,7 +667,6 @@ out: page_cache_release(pinned[1]); *ppos = pos; current->backing_dev_info = NULL; - mark_inode_dirty(inode); return num_written ? num_written : err; } @@ -714,8 +695,8 @@ static int btrfs_sync_file(struct file *file, mutex_unlock(&root->fs_info->trans_mutex); /* - * ok we haven't committed the transaction yet, lets do a commit - */ + * ok we haven't committed the transaction yet, lets do a commit + */ trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 504d37dfa6c9..fd4cb65fcf1f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -121,6 +121,8 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -169,7 +171,7 @@ static void fill_inode_item(struct btrfs_inode_item *item, BTRFS_I(inode)->block_group->key.objectid); } -static int btrfs_update_inode(struct btrfs_trans_handle *trans, +int btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { @@ -400,6 +402,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int found_extent; int del_item; + btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1); path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); @@ -511,6 +514,50 @@ error: return ret; } +static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, + struct inode *inode, struct page *page, + size_t zero_start) +{ + char *kaddr; + int ret = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 alloc_hint = 0; + u64 page_start = page->index << PAGE_CACHE_SHIFT; + struct btrfs_key ins; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + set_page_private(page, 1); + page_cache_get(page); + } + + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_drop_extents(trans, root, inode, + page_start, page_start + PAGE_CACHE_SIZE, + &alloc_hint); + if (ret) + goto out; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) + goto out; + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + page_start, ins.objectid, 1, 1); + if (ret) + goto out; + SetPageChecked(page); + kaddr = kmap(page); + if (zero_start != PAGE_CACHE_SIZE) { + memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); + flush_dcache_page(page); + } + kunmap(page); + +out: + return ret; +} + /* * taken from block_truncate_page, but does cow as it zeros out * any bytes left in the last page in the file. @@ -518,16 +565,14 @@ error: static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; - char *kaddr; int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint = 0; - struct btrfs_key ins; - struct btrfs_trans_handle *trans; + u64 page_start; if ((offset & (blocksize - 1)) == 0) goto out; @@ -536,7 +581,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) page = grab_cache_page(mapping, index); if (!page) goto out; - if (!PageUptodate(page)) { ret = btrfs_readpage(NULL, page); lock_page(page); @@ -545,37 +589,24 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) goto out; } } + page_start = page->index << PAGE_CACHE_SHIFT; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_drop_extents(trans, root, inode, - page->index << PAGE_CACHE_SHIFT, - (page->index + 1) << PAGE_CACHE_SHIFT, - &alloc_hint); - if (ret) - goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) - goto out; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - ins.objectid, 1, 1); - if (ret) - goto out; - SetPageChecked(page); - kaddr = kmap(page); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page->index << PAGE_CACHE_SHIFT, - kaddr, PAGE_CACHE_SIZE); - kunmap(page); + ret = btrfs_cow_one_page(trans, inode, page, offset); + if (!ret) { + char *kaddr = kmap(page); + ret = btrfs_csum_file_block(trans, root, inode->i_ino, + page_start, kaddr, PAGE_CACHE_SIZE); + kunmap(page); + } + set_extent_dirty(&BTRFS_I(inode)->extent_tree, + page_start, page_start + PAGE_CACHE_SIZE - 1, + GFP_NOFS); + set_page_dirty(page); btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - set_page_dirty(page); unlock_page(page); page_cache_release(page); out: @@ -1095,6 +1126,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1238,6 +1271,182 @@ out_unlock: return err; } +struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) +{ + int ret; + int err = 0; + u64 blocknr; + u64 extent_start = 0; + u64 extent_end = 0; + u64 objectid = inode->i_ino; + u32 found_type; + int failed_insert = 0; + struct btrfs_path *path; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_file_extent_item *item; + struct btrfs_leaf *leaf; + struct btrfs_disk_key *found_key; + struct extent_map *em = NULL; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + BUG_ON(!path); + mutex_lock(&root->fs_info->fs_mutex); + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; + } + if (!em) { + em = alloc_extent_map(GFP_NOFS); + if (!em) { + err = -ENOMEM; + goto out; + } + em->start = 0; + em->end = 0; + } + em->bdev = inode->i_sb->s_bdev; + ret = btrfs_lookup_file_extent(NULL, root, path, + objectid, start, 0); + if (ret < 0) { + err = ret; + goto out; + } + + if (ret != 0) { + if (path->slots[0] == 0) + goto not_found; + path->slots[0]--; + } + + item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + struct btrfs_file_extent_item); + leaf = btrfs_buffer_leaf(path->nodes[0]); + blocknr = btrfs_file_extent_disk_blocknr(item); + blocknr += btrfs_file_extent_offset(item); + + /* are we inside the extent that was found? */ + found_key = &leaf->items[path->slots[0]].key; + found_type = btrfs_disk_key_type(found_key); + if (btrfs_disk_key_objectid(found_key) != objectid || + found_type != BTRFS_EXTENT_DATA_KEY) { + goto not_found; + } + + found_type = btrfs_file_extent_type(item); + extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = extent_start + + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + err = 0; + if (start < extent_start || start > extent_end) { + em->start = start; + if (start < extent_start) { + em->end = extent_end - 1; + } else { + em->end = end; + } + goto not_found_em; + } + if (btrfs_file_extent_disk_blocknr(item) == 0) { + em->start = extent_start; + em->end = extent_end - 1; + em->block_start = 0; + em->block_end = 0; + goto insert; + } + em->block_start = blocknr << inode->i_blkbits; + em->block_end = em->block_start + + (btrfs_file_extent_num_blocks(item) << + inode->i_blkbits) - 1; + em->start = extent_start; + em->end = extent_end - 1; + goto insert; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + char *ptr; + char *map; + u32 size; + + size = btrfs_file_extent_inline_len(leaf->items + + path->slots[0]); + extent_end = extent_start + size; + if (start < extent_start || start > extent_end) { + em->start = start; + if (start < extent_start) { + em->end = extent_end - 1; + } else { + em->end = end; + } + goto not_found_em; + } + em->block_start = EXTENT_MAP_INLINE; + em->block_end = EXTENT_MAP_INLINE; + em->start = extent_start; + em->end = extent_end - 1; + if (!page) { + goto insert; + } + ptr = btrfs_file_extent_inline_start(item); + map = kmap(page); + memcpy(map + page_offset, ptr, size); + flush_dcache_page(result->b_page); + kunmap(page); + set_extent_uptodate(em_tree, extent_start, + extent_end, GFP_NOFS); + goto insert; + } else { + printk("unkknown found_type %d\n", found_type); + WARN_ON(1); + } +not_found: + em->start = start; + em->end = end; +not_found_em: + em->block_start = 0; + em->block_end = 0; +insert: + btrfs_release_path(root, path); + if (em->start > start || em->end < start) { + printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); + WARN_ON(1); + err = -EIO; + goto out; + } + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + failed_insert++; + if (failed_insert > 5) { + printk("failing to insert %Lu %Lu\n", start, end); + err = -EIO; + goto out; + } + em = NULL; + goto again; + } + err = 0; +out: + btrfs_free_path(path); + if (trans) { + ret = btrfs_end_transaction(trans, root); + if (!err) + err = ret; + } + mutex_unlock(&root->fs_info->fs_mutex); + if (err) { + free_extent_map(em); + WARN_ON(1); + return ERR_PTR(err); + } + return em; +} + + /* * FIBMAP and others want to pass in a fake buffer head. They need to * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy @@ -1398,46 +1607,22 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, return err; } -static int btrfs_get_block_csum(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create) -{ - int ret; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct page *page = result->b_page; - u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); - struct btrfs_csum_item *item; - struct btrfs_path *path = NULL; - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_get_block_lock(inode, iblock, result, create); - if (ret) - goto out; - - path = btrfs_alloc_path(); - item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); - if (IS_ERR(item)) { - ret = PTR_ERR(item); - /* a csum that isn't present is a preallocated region. */ - if (ret == -ENOENT || ret == -EFBIG) - ret = 0; - result->b_private = NULL; - goto out; - } - memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); -out: - if (path) - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} - static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, struct buffer_head *result, int create) { struct btrfs_root *root = BTRFS_I(inode)->root; - mutex_lock(&root->fs_info->fs_mutex); - btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); - mutex_unlock(&root->fs_info->fs_mutex); + u64 start = iblock << inode->i_blkbits; + u64 end = start + root->blocksize -1; + struct extent_map *em; + + em = btrfs_get_extent(inode, NULL, 0, start, end, 0); + if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE && + em->block_start != 0) { + u64 offset; + offset = start - em->start; + start = (em->block_start + offset) >> inode->i_blkbits; + btrfs_map_bh_to_logical(root, result, start); + } return 0; } @@ -1449,442 +1634,50 @@ static sector_t btrfs_bmap(struct address_space *as, sector_t block) static int btrfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { - return block_prepare_write(page, from, to, btrfs_get_block); + return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree, + page->mapping->host, page, from, to, + btrfs_get_extent); } -static void buffer_io_error(struct buffer_head *bh) +int btrfs_readpage(struct file *file, struct page *page) { - char b[BDEVNAME_SIZE]; - - printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", - bdevname(bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btrfs_get_extent); } - -/* - * I/O completion handler for block_read_full_page() - pages - * which come unlocked at the end of I/O. - */ -static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) +static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - unsigned long flags; - struct buffer_head *first; - struct buffer_head *tmp; - struct page *page; - int page_uptodate = 1; - struct inode *inode; - int ret; - - BUG_ON(!buffer_async_read(bh)); - - page = bh->b_page; - inode = page->mapping->host; - if (uptodate) { - void *kaddr; - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - if (bh->b_private) { - char csum[BTRFS_CRC32_SIZE]; - kaddr = kmap_atomic(page, KM_IRQ0); - ret = btrfs_csum_data(root, kaddr + bh_offset(bh), - bh->b_size, csum); - BUG_ON(ret); - if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { - u64 offset; - offset = (page->index << PAGE_CACHE_SHIFT) + - bh_offset(bh); - printk("btrfs csum failed ino %lu off %llu\n", - page->mapping->host->i_ino, - (unsigned long long)offset); - memset(kaddr + bh_offset(bh), 1, bh->b_size); - flush_dcache_page(page); - } - kunmap_atomic(kaddr, KM_IRQ0); - } - set_buffer_uptodate(bh); - } else { - clear_buffer_uptodate(bh); - if (printk_ratelimit()) - buffer_io_error(bh); - SetPageError(page); - } - - /* - * Be _very_ careful from here on. Bad things can happen if - * two buffer heads end IO at almost the same time and both - * decide that the page is now completely done. - */ - first = page_buffers(page); - local_irq_save(flags); - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); - clear_buffer_async_read(bh); - unlock_buffer(bh); - tmp = bh; - do { - if (!buffer_uptodate(tmp)) - page_uptodate = 0; - if (buffer_async_read(tmp)) { - BUG_ON(!buffer_locked(tmp)); - goto still_busy; - } - tmp = tmp->b_this_page; - } while (tmp != bh); - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - - /* - * If none of the buffers had errors and they are all - * uptodate then we can set the page uptodate. - */ - if (page_uptodate && !PageError(page)) - SetPageUptodate(page); - unlock_page(page); - return; - -still_busy: - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); - local_irq_restore(flags); - return; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } -/* - * Generic "read page" function for block devices that have the normal - * get_block functionality. This is most of the block device filesystems. - * Reads the page asynchronously --- the unlock_buffer() and - * set/clear_buffer_uptodate() functions propagate buffer state into the - * page struct once IO has completed. - */ -int btrfs_readpage(struct file *file, struct page *page) +static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct inode *inode = page->mapping->host; - sector_t iblock, lblock; - struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize; - int nr, i; - int fully_mapped = 1; - - BUG_ON(!PageLocked(page)); - blocksize = 1 << inode->i_blkbits; - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - head = page_buffers(page); - - iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; - bh = head; - nr = 0; - i = 0; - - do { - if (buffer_uptodate(bh)) - continue; - - if (!buffer_mapped(bh)) { - int err = 0; - - fully_mapped = 0; - if (iblock < lblock) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block_csum(inode, iblock, - bh, 0); - if (err) - SetPageError(page); - } - if (!buffer_mapped(bh)) { - void *kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + i * blocksize, 0, blocksize); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - if (!err) - set_buffer_uptodate(bh); - continue; - } - /* - * get_block() might have updated the buffer - * synchronously - */ - if (buffer_uptodate(bh)) - continue; - } - arr[nr++] = bh; - } while (i++, iblock++, (bh = bh->b_this_page) != head); - - if (fully_mapped) - SetPageMappedToDisk(page); - - if (!nr) { - /* - * All buffers are uptodate - we can set the page uptodate - * as well. But not if get_block() returned an error. - */ - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - return 0; - } - - /* Stage two: lock the buffers */ - for (i = 0; i < nr; i++) { - bh = arr[i]; - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_async_read; - set_buffer_async_read(bh); - } - - /* - * Stage 3: start the IO. Check for uptodateness - * inside the buffer lock in case another process reading - * the underlying blockdev brought it uptodate (the sct fix). - */ - for (i = 0; i < nr; i++) { - bh = arr[i]; - if (buffer_uptodate(bh)) - btrfs_end_buffer_async_read(bh, 1); - else - submit_bh(READ, bh); - } - return 0; -} - -/* - * Aside from a tiny bit of packed file data handling, this is the - * same as the generic code. - * - * While block_write_full_page is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them - * again at any time. We handle that by only looking at the buffer - * state inside lock_buffer(). - * - * If block_write_full_page() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. - */ -static int __btrfs_write_full_page(struct inode *inode, struct page *page, - struct writeback_control *wbc) -{ - int err; - sector_t block; - sector_t last_block; - struct buffer_head *bh, *head; - const unsigned blocksize = 1 << inode->i_blkbits; - int nr_underway = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - - BUG_ON(!PageLocked(page)); - - last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; - - /* no csumming allowed when from PF_MEMALLOC */ - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) { - create_empty_buffers(page, blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); + if (page->private != 1) { + WARN_ON(1); + return try_to_free_buffers(page); } - - /* - * Be very careful. We have no exclusion from __set_page_dirty_buffers - * here, and the (potentially unmapped) buffers may become dirty at - * any time. If a buffer becomes dirty here after we've inspected it - * then we just miss that fact, and the page stays dirty. - * - * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; - * handle that here by just cleaning them. - */ - - block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - head = page_buffers(page); - bh = head; - - /* - * Get all the dirty buffers mapped to disk addresses and - * handle any aliases from the underlying blockdev's mapping. - */ - do { - if (block > last_block) { - /* - * mapped buffers outside i_size will occur, because - * this page can be outside i_size when there is a - * truncate in progress. - */ - /* - * The buffer was zeroed by block_write_full_page() - */ - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - WARN_ON(bh->b_size != blocksize); - err = btrfs_get_block(inode, block, bh, 0); - if (err) { - goto recover; - } - if (buffer_new(bh)) { - /* blockdev mappings never come here */ - clear_buffer_new(bh); - } - } - bh = bh->b_this_page; - block++; - } while (bh != head); - - do { - if (!buffer_mapped(bh)) - continue; - /* - * If it's a fully non-blocking write attempt and we cannot - * lock the buffer then redirty the page. Note that this can - * potentially cause a busy-wait loop from pdflush and kswapd - * activity, but those code paths have their own higher-level - * throttling. - */ - if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { - lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { - redirty_page_for_writepage(wbc, page); - continue; - } - if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { - struct btrfs_trans_handle *trans; - int ret; - u64 off = page->index << PAGE_CACHE_SHIFT; - char *kaddr; - - off += bh_offset(bh); - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - kaddr = kmap(page); - btrfs_csum_file_block(trans, root, inode->i_ino, - off, kaddr + bh_offset(bh), - bh->b_size); - kunmap(page); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - mark_buffer_async_write(bh); - } else { - unlock_buffer(bh); - } - } while ((bh = bh->b_this_page) != head); - - /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. - */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - - err = 0; -done: - if (nr_underway == 0) { - /* - * The page was marked dirty, but the buffers were - * clean. Someone wrote them back by hand with - * ll_rw_block/submit_bh. A rare case. - */ - int uptodate = 1; - do { - if (!buffer_uptodate(bh)) { - uptodate = 0; - break; - } - bh = bh->b_this_page; - } while (bh != head); - if (uptodate) - SetPageUptodate(page); - end_page_writeback(page); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); } - return err; - -recover: - /* - * ENOSPC, or some other error. We may already have added some - * blocks to the file, so we need to write these out to avoid - * exposing stale data. - * The page is currently locked and not marked for writeback - */ - bh = head; - /* Recovery: lock and submit the mapped buffers */ - do { - if (buffer_mapped(bh) && buffer_dirty(bh)) { - lock_buffer(bh); - mark_buffer_async_write(bh); - } else { - /* - * The buffer may have been set dirty during - * attachment to a dirty page. - */ - clear_buffer_dirty(bh); - } - } while ((bh = bh->b_this_page) != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - set_page_writeback(page); - do { - struct buffer_head *next = bh->b_this_page; - if (buffer_async_write(bh)) { - clear_buffer_dirty(bh); - submit_bh(WRITE, bh); - nr_underway++; - } - bh = next; - } while (bh != head); - unlock_page(page); - goto done; + return ret; } -static int btrfs_writepage(struct page *page, struct writeback_control *wbc) +static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset; - void *kaddr; - - /* Is the page fully inside i_size? */ - if (page->index < end_index) - return __btrfs_write_full_page(inode, page, wbc); - - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_CACHE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - /* - * The page may have dirty, unmapped buffers. For example, - * they may have been added in ext3_writepage(). Make them - * freeable here, so the page does not leak. - */ - block_invalidatepage(page, 0); - unlock_page(page); - return 0; /* don't care */ - } + struct extent_map_tree *tree; - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invokation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - return __btrfs_write_full_page(inode, page, wbc); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btrfs_releasepage(page, GFP_NOFS); } /* @@ -1905,28 +1698,39 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; + u64 page_start; lock_page(page); wait_on_page_writeback(page); size = i_size_read(inode); + page_start = page->index << PAGE_CACHE_SHIFT; + if ((page->mapping != inode->i_mapping) || - ((page->index << PAGE_CACHE_SHIFT) > size)) { + (page_start > size)) { /* page got truncated out from underneath us */ goto out_unlock; } /* page is wholly or partially inside EOF */ - if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) + if (page_start + PAGE_CACHE_SIZE > size) end = size & ~PAGE_CACHE_MASK; else end = PAGE_CACHE_SIZE; - ret = btrfs_prepare_write(NULL, page, 0, end); - if (!ret) - ret = btrfs_commit_write(NULL, page, 0, end); + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_cow_one_page(trans, inode, page, end); + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + set_extent_dirty(&BTRFS_I(inode)->extent_tree, + page_start, page_start + PAGE_CACHE_SIZE - 1, + GFP_NOFS); + set_page_dirty(page); out_unlock: unlock_page(page); @@ -1962,21 +1766,8 @@ static void btrfs_truncate(struct inode *inode) int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { - struct inode *inode = page->mapping->host; - struct buffer_head *bh; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - SetPageUptodate(page); - bh = page_buffers(page); - set_buffer_uptodate(bh); - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - set_page_dirty(page); - } - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; + return extent_commit_write(&BTRFS_I(page->mapping->host)->extent_tree, + page->mapping->host, page, from, to); } static int create_subvol(struct btrfs_root *root, char *name, int namelen) @@ -2471,6 +2262,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2553,6 +2346,9 @@ static struct address_space_operations btrfs_aops = { .prepare_write = btrfs_prepare_write, .commit_write = btrfs_commit_write, .bmap = btrfs_bmap, + .invalidatepage = btrfs_invalidatepage, + .releasepage = btrfs_releasepage, + .set_page_dirty = __set_page_dirty_nobuffers, }; static struct address_space_operations btrfs_symlink_aops = { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 74f3de47423c..7b7c32c73a2c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -185,6 +185,7 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) return err; + extent_map_init(); return register_filesystem(&btrfs_fs_type); } @@ -192,6 +193,7 @@ static void __exit exit_btrfs_fs(void) { btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); + extent_map_exit(); unregister_filesystem(&btrfs_fs_type); } -- cgit v1.2.3 From b888db2bd7b67f190b32934e6a86181f262ac3ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 27 Aug 2007 16:49:44 -0400 Subject: Btrfs: Add delayed allocation to the extent based page tree code Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 --- fs/btrfs/extent_map.c | 147 +++++++++++++++++++++++++++++++++++++------------ fs/btrfs/extent_map.h | 3 + fs/btrfs/file.c | 50 +++++++---------- fs/btrfs/inode.c | 141 ++++++++++++++++++++++++++--------------------- fs/btrfs/super.c | 1 + 6 files changed, 216 insertions(+), 135 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4049aadbeda9..c31e84d42653 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -39,7 +39,6 @@ static int cache_block_group(struct btrfs_root *root, u64 i; u64 last = 0; u64 hole_size; - u64 limit; int found = 0; root = root->fs_info->extent_root; @@ -62,7 +61,6 @@ static int cache_block_group(struct btrfs_root *root, return ret; if (ret && path->slots[0] > 0) path->slots[0]--; - limit = block_group->key.objectid + block_group->key.offset; while(1) { leaf = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; @@ -982,7 +980,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 limit; WARN_ON(num_blocks < 1); ins->flags = 0; @@ -1049,12 +1046,6 @@ check_failed: l = btrfs_buffer_leaf(path->nodes[0]); slot = path->slots[0]; if (slot >= btrfs_header_nritems(&l->header)) { - if (start_found) - limit = last_block + - (block_group->key.offset >> 1); - else - limit = search_start + - (block_group->key.offset >> 1); ret = btrfs_next_leaf(root, path); if (ret == 0) continue; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index d378edf0964e..a9c7419615b9 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -30,9 +30,6 @@ struct tree_entry { #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) -static LIST_HEAD(all_states); -spinlock_t state_lock = SPIN_LOCK_UNLOCKED; - void __init extent_map_init(void) { extent_map_cache = kmem_cache_create("extent_map", @@ -49,15 +46,6 @@ void __init extent_map_init(void) void __exit extent_map_exit(void) { - while(!list_empty(&all_states)) { - struct extent_state *state; - struct list_head *cur = all_states.next; - state = list_entry(cur, struct extent_state, list); - printk("found leaked state %Lu %Lu state %d in_tree %d\n", - state->start, state->end, state->state, state->in_tree); - list_del(&state->list); - kfree(state); - } if (extent_map_cache) kmem_cache_destroy(extent_map_cache); if (extent_state_cache) @@ -69,6 +57,7 @@ void extent_map_tree_init(struct extent_map_tree *tree, { tree->map.rb_node = NULL; tree->state.rb_node = NULL; + tree->fill_delalloc = NULL; rwlock_init(&tree->lock); tree->mapping = mapping; } @@ -106,9 +95,6 @@ struct extent_state *alloc_extent_state(gfp_t mask) state->in_tree = 0; atomic_set(&state->refs, 1); init_waitqueue_head(&state->wq); - spin_lock_irq(&state_lock); - list_add(&state->list, &all_states); - spin_unlock_irq(&state_lock); return state; } EXPORT_SYMBOL(alloc_extent_state); @@ -117,9 +103,6 @@ void free_extent_state(struct extent_state *state) { if (atomic_dec_and_test(&state->refs)) { WARN_ON(state->in_tree); - spin_lock_irq(&state_lock); - list_del_init(&state->list); - spin_unlock_irq(&state_lock); kmem_cache_free(extent_state_cache, state); } } @@ -369,7 +352,7 @@ static int insert_state(struct extent_map_tree *tree, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); free_extent_state(state); return -EEXIST; } @@ -408,7 +391,7 @@ static int split_state(struct extent_map_tree *tree, struct extent_state *orig, if (node) { struct extent_state *found; found = rb_entry(node, struct extent_state, rb_node); -printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); free_extent_state(prealloc); return -EEXIST; } @@ -792,10 +775,20 @@ int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, } EXPORT_SYMBOL(set_extent_dirty); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) { - return clear_extent_bit(tree, start, end, EXTENT_DIRTY, 0, 0, mask); + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); } EXPORT_SYMBOL(clear_extent_dirty); @@ -922,6 +915,62 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +u64 find_lock_delalloc_range(struct extent_map_tree *tree, + u64 start, u64 lock_start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + goto out; + } + if (state->start >= lock_start) { + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + } + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes = state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + /* * helper function to lock both pages and extents in the tree. * pages must be locked first. @@ -1285,6 +1334,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1384,7 +1434,10 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, size_t blocksize; loff_t i_size = i_size_read(inode); unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + WARN_ON(!PageLocked(page)); if (page->index > end_index) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); unlock_page(page); @@ -1400,11 +1453,34 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - end = page_end; lock_extent(tree, start, page_end, GFP_NOFS); + nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc) { + tree->fill_delalloc(inode, start, delalloc_end); + if (delalloc_end >= page_end + 1) { + clear_extent_bit(tree, page_end + 1, delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + } + clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC, + 0, 0, GFP_NOFS); + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after clear extent_bit\n"); + } + } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after find_delalloc_range returns 0\n"); + } + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } if (last_byte <= start) { clear_extent_dirty(tree, start, page_end, GFP_NOFS); @@ -1419,7 +1495,7 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, clear_extent_dirty(tree, cur, page_end, GFP_NOFS); break; } - em = get_extent(inode, page, page_offset, cur, end, 1); + em = get_extent(inode, page, page_offset, cur, end, 0); if (IS_ERR(em) || !em) { SetPageError(page); break; @@ -1507,6 +1583,7 @@ int extent_commit_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } @@ -1543,6 +1620,7 @@ int extent_prepare_write(struct extent_map_tree *tree, if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } block_start = (page_start + from) & ~((u64)blocksize - 1); @@ -1628,29 +1706,28 @@ int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) u64 start = page->index << PAGE_CACHE_SHIFT; u64 end = start + PAGE_CACHE_SIZE - 1; u64 orig_start = start; + int ret = 1; while (start <= end) { em = lookup_extent_mapping(tree, start, end); if (!em || IS_ERR(em)) break; - if (test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { + if (!test_range_bit(tree, em->start, em->end, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(tree, em); + /* once for the rb tree */ free_extent_map(em); - start = em->end + 1; -printk("range still locked %Lu %Lu\n", em->start, em->end); - break; } - remove_extent_mapping(tree, em); start = em->end + 1; - /* once for the rb tree */ - free_extent_map(em); /* once for us */ free_extent_map(em); } - WARN_ON(test_range_bit(tree, orig_start, end, EXTENT_WRITEBACK, 0)); - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return 1; + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; } EXPORT_SYMBOL(try_release_extent_mapping); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 108944aab4bd..e91a2e9619ee 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -11,6 +11,7 @@ struct extent_map_tree { struct rb_root state; struct address_space *mapping; rwlock_t lock; + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); }; /* note, this must start with the same fields as fs/extent_map.c:tree_entry */ @@ -74,6 +75,8 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 71a481894ab6..d3d39e4a2797 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -143,7 +143,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct btrfs_key ins; u64 hint_block; u64 num_blocks; u64 start_pos; @@ -162,6 +161,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_blkbits; end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -179,16 +179,6 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* FIXME...EIEIO, ENOSPC and more */ - /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ - if (start_pos < inode->i_size) { - err = btrfs_drop_extents(trans, root, inode, - start_pos, (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); - if (err) - goto failed; - } - /* insert any holes we need to create */ if (inode->i_size < start_pos) { u64 last_pos_in_file; @@ -213,29 +203,28 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, */ if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { - err = btrfs_alloc_extent(trans, root, inode->i_ino, - num_blocks, 0, hint_block, (u64)-1, - &ins, 1); - BUG_ON(err); - err = btrfs_insert_file_extent(trans, root, inode->i_ino, - start_pos, ins.objectid, ins.offset, - ins.offset); - BUG_ON(err); - em->start = start_pos; - em->end = end_of_last_block; - em->block_start = ins.objectid << inode->i_blkbits; - em->block_end = em->block_start + - (ins.offset << inode->i_blkbits) - 1; - set_extent_dirty(em_tree, start_pos, end_of_last_block, - GFP_NOFS); - err = add_extent_mapping(em_tree, em); + u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); - __set_page_dirty_nobuffers(p); + set_page_dirty(p); } + last_end = pages[num_pages -1]->index << PAGE_CACHE_SHIFT; + last_end += PAGE_CACHE_SIZE - 1; + set_extent_delalloc(em_tree, start_pos, end_of_last_block, + GFP_NOFS); } else { struct page *p = pages[0]; + /* step one, delete the existing extents in this range */ + /* FIXME blocksize != pagesize */ + if (start_pos < inode->i_size) { + err = btrfs_drop_extents(trans, root, inode, start_pos, + (pos + write_bytes + root->blocksize -1) & + ~((u64)root->blocksize - 1), &hint_block); + if (err) + goto failed; + } + err = insert_inline_extent(trans, root, inode, start_pos, end_pos - start_pos, p, 0); BUG_ON(err); @@ -253,6 +242,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); + unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); free_extent_map(em); return err; } @@ -531,7 +521,7 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1); + start_pos = pos & ~((u64)root->blocksize - 1); num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> inode->i_blkbits; @@ -548,8 +538,10 @@ static int prepare_pages(struct btrfs_root *root, if (!PagePrivate(pages[i])) { SetPagePrivate(pages[i]); set_page_private(pages[i], 1); + WARN_ON(!pages[i]->mapping->a_ops->invalidatepage); page_cache_get(pages[i]); } + WARN_ON(!PageLocked(pages[i])); } return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fd4cb65fcf1f..dc181089aa74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -70,6 +70,40 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_key ins; + u64 alloc_hint = 0; + u64 num_blocks; + int ret; + u64 blocksize = 1 << inode->i_blkbits; + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + BUG_ON(!trans); + num_blocks = (end - start + blocksize) & ~(blocksize - 1); + ret = btrfs_drop_extents(trans, root, inode, + start, start + num_blocks, &alloc_hint); + num_blocks = num_blocks >> inode->i_blkbits; + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + + void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -121,8 +155,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, - inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -450,11 +483,17 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(ret); } if (item_end < inode->i_size) { - if (found_type) { - btrfs_set_key_type(&key, found_type - 1); - continue; + if (found_type == BTRFS_DIR_ITEM_KEY) { + found_type = BTRFS_INODE_ITEM_KEY; + } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { + found_type = BTRFS_CSUM_ITEM_KEY; + } else if (found_type) { + found_type--; + } else { + break; } - break; + btrfs_set_key_type(&key, found_type - 1); + continue; } if (btrfs_disk_key_offset(found_key) >= inode->i_size) del_item = 1; @@ -514,47 +553,34 @@ error: return ret; } -static int btrfs_cow_one_page(struct btrfs_trans_handle *trans, - struct inode *inode, struct page *page, +static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; int ret = 0; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 alloc_hint = 0; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 page_start = page->index << PAGE_CACHE_SHIFT; - struct btrfs_key ins; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; if (!PagePrivate(page)) { SetPagePrivate(page); set_page_private(page, 1); + WARN_ON(!page->mapping->a_ops->invalidatepage); page_cache_get(page); } - btrfs_set_trans_block_group(trans, inode); - - ret = btrfs_drop_extents(trans, root, inode, - page_start, page_start + PAGE_CACHE_SIZE, - &alloc_hint); - if (ret) - goto out; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) - goto out; - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - page_start, ins.objectid, 1, 1); - if (ret) - goto out; - SetPageChecked(page); - kaddr = kmap(page); + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + page_end, GFP_NOFS); if (zero_start != PAGE_CACHE_SIZE) { + kaddr = kmap(page); memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start); flush_dcache_page(page); + kunmap(page); } - kunmap(page); + set_page_dirty(page); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); -out: return ret; } @@ -565,8 +591,6 @@ out: static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned blocksize = 1 << inode->i_blkbits; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); @@ -591,21 +615,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) } page_start = page->index << PAGE_CACHE_SHIFT; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, offset); - if (!ret) { - char *kaddr = kmap(page); - ret = btrfs_csum_file_block(trans, root, inode->i_ino, - page_start, kaddr, PAGE_CACHE_SIZE); - kunmap(page); - } - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + ret = btrfs_cow_one_page(inode, page, offset); unlock_page(page); page_cache_release(page); @@ -757,6 +767,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); return 0; } @@ -968,7 +980,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; + if (mode & S_IFDIR) owner = 0; else @@ -1128,6 +1143,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1344,9 +1360,11 @@ again: extent_end = extent_start + (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); err = 0; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1375,9 +1393,11 @@ again: size = btrfs_file_extent_inline_len(leaf->items + path->slots[0]); extent_end = extent_start + size; - if (start < extent_start || start > extent_end) { + if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { + if (end < extent_start) + goto not_found; em->end = extent_end - 1; } else { em->end = end; @@ -1412,8 +1432,7 @@ not_found_em: insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { - printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end); - WARN_ON(1); + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); err = -EIO; goto out; } @@ -1648,6 +1667,13 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } @@ -1698,8 +1724,6 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; unsigned long end; loff_t size; int ret = -EINVAL; @@ -1722,15 +1746,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) else end = PAGE_CACHE_SIZE; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_cow_one_page(trans, inode, page, end); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); - set_extent_dirty(&BTRFS_I(inode)->extent_tree, - page_start, page_start + PAGE_CACHE_SIZE - 1, - GFP_NOFS); - set_page_dirty(page); + ret = btrfs_cow_one_page(inode, page, end); out_unlock: unlock_page(page); @@ -2264,6 +2280,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_file_inode_operations; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, inode->i_mapping, GFP_NOFS); + BTRFS_I(inode)->extent_tree.fill_delalloc = run_delalloc_range; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7b7c32c73a2c..7da95538745f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -83,6 +83,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) bi->location.offset = 0; bi->location.flags = 0; bi->root = tree_root; + btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); if (!inode) { -- cgit v1.2.3 From 58176a9604c5db1784d2c979aea472b3be40b6f0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Aug 2007 15:47:34 -0400 Subject: Btrfs: Add per-root block accounting and sysfs entries Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/ctree.h | 54 +++++++++++ fs/btrfs/disk-io.c | 31 ++++++- fs/btrfs/disk-io.h | 3 +- fs/btrfs/extent-tree.c | 17 +++- fs/btrfs/inode.c | 13 ++- fs/btrfs/super.c | 14 +++ fs/btrfs/sysfs.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/transaction.c | 34 ++++++- 9 files changed, 393 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index ea7a22870350..ee505c87ecf3 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o + extent_map.o sysfs.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 947f061ed118..ffeee546a0d7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "bit-radix.h" struct btrfs_trans_handle; @@ -313,6 +314,8 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head dead_roots; struct delayed_work trans_work; + struct kobject super_kobj; + struct completion kobj_unregister; int do_barriers; int closing; }; @@ -328,6 +331,8 @@ struct btrfs_root { struct btrfs_key root_key; struct btrfs_fs_info *fs_info; struct inode *inode; + struct kobject root_kobj; + struct completion kobj_unregister; u64 objectid; u64 last_trans; u32 blocksize; @@ -338,6 +343,7 @@ struct btrfs_root { struct btrfs_key defrag_progress; int defrag_running; int defrag_level; + char *name; }; /* the lower bits in the key flags defines the item type */ @@ -814,6 +820,28 @@ static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) item->flags = cpu_to_le32(val); } +static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, + u64 val) +{ + item->blocks_used = cpu_to_le64(val); +} + +static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->blocks_used); +} + +static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, + u64 val) +{ + item->block_limit = cpu_to_le64(val); +} + +static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) +{ + return le64_to_cpu(item->block_limit); +} + static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) { return le64_to_cpu(s->blocknr); @@ -1014,6 +1042,23 @@ static inline void btrfs_memmove(struct btrfs_root *root, memmove(dst, src, nr); } +static inline int btrfs_set_root_name(struct btrfs_root *root, + const char *name, int len) +{ + /* if we already have a name just free it */ + if (root->name) + kfree(root->name); + + root->name = kmalloc(len+1, GFP_KERNEL); + if (!root->name) + return -ENOMEM; + + memcpy(root->name, name, len); + root->name[len] ='\0'; + + return 0; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -1191,4 +1236,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); + +/* sysfs.c */ +int btrfs_init_sysfs(void); +void btrfs_exit_sysfs(void); +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs); +int btrfs_sysfs_add_root(struct btrfs_root *root); +void btrfs_sysfs_del_root(struct btrfs_root *root); +void btrfs_sysfs_del_super(struct btrfs_fs_info *root); + #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b2f79878d51a..c25ef0a68f18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -294,9 +294,12 @@ static int __setup_root(int blocksize, root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + root->name = NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -384,7 +387,8 @@ insert: } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) + struct btrfs_key *location, + const char *name, int namelen) { struct btrfs_root *root; int ret; @@ -405,6 +409,22 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + + ret = btrfs_set_root_name(root, name, namelen); + if (ret) { + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } + + ret = btrfs_sysfs_add_root(root); + if (ret) { + brelse(root->node); + kfree(root->name); + kfree(root); + return ERR_PTR(ret); + } + return root; } @@ -433,6 +453,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); + init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->last_trans_committed = 0; @@ -500,8 +522,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; ret = btrfs_find_dead_roots(tree_root); - if (ret) + if (ret) { + mutex_unlock(&fs_info->fs_mutex); goto fail_tree_root; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -553,12 +577,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) brelse(root->node); if (root->commit_root) brelse(root->commit_root); + if (root->name) + kfree(root->name); kfree(root); return 0; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 5261733b8735..da6bb72750f2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -66,7 +66,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location); + struct btrfs_key *location, + const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); u64 bh_blocknr(struct buffer_head *bh); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c31e84d42653..ff3f7c2be605 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -858,16 +858,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_extent_refs(ei, refs); btrfs_mark_buffer_dirty(path->nodes[0]); if (refs == 0) { - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; if (pin) { ret = pin_down_block(root, blocknr, 0); BUG_ON(ret); } + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used - num_blocks); + + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used - num_blocks); + ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; @@ -1175,7 +1182,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, { int ret; int pending_ret; - u64 super_blocks_used; + u64 super_blocks_used, root_blocks_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1193,10 +1200,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, if (ret) return ret; + /* block accounting for super block */ super_blocks_used = btrfs_super_blocks_used(&info->super_copy); btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + num_blocks); + /* block accounting for root item */ + root_blocks_used = btrfs_root_blocks_used(&root->root_item); + btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + num_blocks); + if (root == extent_root) { BUG_ON(num_blocks != 1); set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dc181089aa74..2e3918e6049e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -733,7 +733,8 @@ out: */ static int fixup_tree_root_location(struct btrfs_root *root, struct btrfs_key *location, - struct btrfs_root **sub_root) + struct btrfs_root **sub_root, + struct dentry *dentry) { struct btrfs_path *path; struct btrfs_root_item *ri; @@ -747,7 +748,9 @@ static int fixup_tree_root_location(struct btrfs_root *root, BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); - *sub_root = btrfs_read_fs_root(root->fs_info, location); + *sub_root = btrfs_read_fs_root(root->fs_info, location, + dentry->d_name.name, + dentry->d_name.len); if (IS_ERR(*sub_root)) return PTR_ERR(*sub_root); @@ -812,7 +815,8 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(ret); inode = NULL; if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root); + ret = fixup_tree_root_location(root, &location, &sub_root, + dentry); if (ret < 0) return ERR_PTR(ret); if (ret > 0) @@ -1829,6 +1833,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); btrfs_set_root_refs(&root_item, 1); + btrfs_set_root_blocks_used(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; brelse(subvol); @@ -1865,7 +1870,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail_commit; - new_root = btrfs_read_fs_root(root->fs_info, &key); + new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen); BUG_ON(!new_root); trans = btrfs_start_transaction(new_root, 1); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7da95538745f..a68101ae1a3b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,12 +45,14 @@ static struct super_operations btrfs_super_ops; static void btrfs_put_super (struct super_block * sb) { struct btrfs_root *root = btrfs_sb(sb); + struct btrfs_fs_info *fs = root->fs_info; int ret; ret = close_ctree(root); if (ret) { printk("close ctree returns %d\n", ret); } + btrfs_sysfs_del_super(fs); sb->s_fs_info = NULL; } @@ -101,6 +103,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) err = -ENOMEM; goto fail_close; } + + /* this does the super kobj at the same time */ + err = btrfs_sysfs_add_super(tree_root->fs_info); + if (err) + goto fail_close; + sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); return 0; @@ -182,6 +190,11 @@ static struct super_operations btrfs_super_ops = { static int __init init_btrfs_fs(void) { int err; + + err = btrfs_init_sysfs(); + if (err) + return err; + btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) @@ -196,6 +209,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); unregister_filesystem(&btrfs_fs_type); + btrfs_exit_sysfs(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index db8917e5b256..2058783373eb 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -16,6 +16,242 @@ * Boston, MA 021110-1307, USA. */ +#include +#include +#include +#include +#include +#include +#include + #include "ctree.h" #include "disk-io.h" #include "transaction.h" + +static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_blocks_used(&root->root_item)); +} + +static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_root_block_limit(&root->root_item)); +} + +static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); +} + +static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); +} + +static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)btrfs_super_blocksize(fs->disk_super)); +} + +/* this is for root attrs (subvols/snapshots) */ +struct btrfs_root_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_root *, char *); + ssize_t (*store)(struct btrfs_root *, const char *, size_t); +}; + +#define ROOT_ATTR(name, mode, show, store) \ +static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, show, store) + +ROOT_ATTR(blocks_used, 0444, root_blocks_used_show, NULL); +ROOT_ATTR(block_limit, 0644, root_block_limit_show, NULL); + +static struct attribute *btrfs_root_attrs[] = { + &btrfs_root_attr_blocks_used.attr, + &btrfs_root_attr_block_limit.attr, + NULL, +}; + +/* this is for super attrs (actual full fs) */ +struct btrfs_super_attr { + struct attribute attr; + ssize_t (*show)(struct btrfs_fs_info *, char *); + ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t); +}; + +#define SUPER_ATTR(name, mode, show, store) \ +static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, show, store) + +SUPER_ATTR(blocks_used, 0444, super_blocks_used_show, NULL); +SUPER_ATTR(total_blocks, 0444, super_total_blocks_show, NULL); +SUPER_ATTR(blocksize, 0444, super_blocksize_show, NULL); + +static struct attribute *btrfs_super_attrs[] = { + &btrfs_super_attr_blocks_used.attr, + &btrfs_super_attr_total_blocks.attr, + &btrfs_super_attr_blocksize.attr, + NULL, +}; + +static ssize_t btrfs_super_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->show ? a->show(fs, buf) : 0; +} + +static ssize_t btrfs_super_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + struct btrfs_super_attr *a = container_of(attr, + struct btrfs_super_attr, + attr); + + return a->store ? a->store(fs, buf, len) : 0; +} + +static ssize_t btrfs_root_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + + return a->show ? a->show(root, buf) : 0; +} + +static ssize_t btrfs_root_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + struct btrfs_root_attr *a = container_of(attr, + struct btrfs_root_attr, + attr); + return a->store ? a->store(root, buf, len) : 0; +} + +static void btrfs_super_release(struct kobject *kobj) +{ + struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info, + super_kobj); + complete(&fs->kobj_unregister); +} + +static void btrfs_root_release(struct kobject *kobj) +{ + struct btrfs_root *root = container_of(kobj, struct btrfs_root, + root_kobj); + complete(&root->kobj_unregister); +} + +static struct sysfs_ops btrfs_super_attr_ops = { + .show = btrfs_super_attr_show, + .store = btrfs_super_attr_store, +}; + +static struct sysfs_ops btrfs_root_attr_ops = { + .show = btrfs_root_attr_show, + .store = btrfs_root_attr_store, +}; + +static struct kobj_type btrfs_root_ktype = { + .default_attrs = btrfs_root_attrs, + .sysfs_ops = &btrfs_root_attr_ops, + .release = btrfs_root_release, +}; + +static struct kobj_type btrfs_super_ktype = { + .default_attrs = btrfs_super_attrs, + .sysfs_ops = &btrfs_super_attr_ops, + .release = btrfs_super_release, +}; + +static struct kset btrfs_kset = { + .kobj = {.name = "btrfs"}, +}; + +int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) +{ + int error; + + fs->super_kobj.kset = &btrfs_kset; + fs->super_kobj.ktype = &btrfs_super_ktype; + + error = kobject_set_name(&fs->super_kobj, "%s", + fs->sb->s_id); + if (error) + goto fail; + + error = kobject_register(&fs->super_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); + return error; +} + +int btrfs_sysfs_add_root(struct btrfs_root *root) +{ + int error; + + root->root_kobj.ktype = &btrfs_root_ktype; + root->root_kobj.parent = &root->fs_info->super_kobj; + + error = kobject_set_name(&root->root_kobj, "%s", root->name); + if (error) { + goto fail; + } + + error = kobject_register(&root->root_kobj); + if (error) + goto fail; + + return 0; + +fail: + printk(KERN_ERR "btrfs: sysfs creation for root failed\n"); + return error; +} + +void btrfs_sysfs_del_root(struct btrfs_root *root) +{ + kobject_unregister(&root->root_kobj); + wait_for_completion(&root->kobj_unregister); +} + +void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) +{ + kobject_unregister(&fs->super_kobj); + wait_for_completion(&fs->kobj_unregister); +} + +int btrfs_init_sysfs() +{ + kobj_set_kset_s(&btrfs_kset, fs_subsys); + return kset_register(&btrfs_kset); +} + +void btrfs_exit_sysfs() +{ + kset_unregister(&btrfs_kset); +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 18abea802794..2b15daa3a9f2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -236,6 +236,7 @@ static int wait_for_commit(struct btrfs_root *root, struct dirty_root { struct list_head list; struct btrfs_root *root; + struct btrfs_root *latest_root; }; int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list) @@ -278,6 +279,15 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, btrfs_root_blocknr(&root->root_item)); brelse(root->commit_root); root->commit_root = NULL; + + /* make sure to update the root on disk + * so we get any updates to the block used + * counts + */ + err = btrfs_update_root(trans, + root->fs_info->tree_root, + &root->root_key, + &root->root_item); continue; } dirty = kmalloc(sizeof(*dirty), GFP_NOFS); @@ -291,6 +301,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, memcpy(dirty->root, root, sizeof(*root)); dirty->root->node = root->commit_root; + dirty->latest_root = root; root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; @@ -384,20 +395,29 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, { struct dirty_root *dirty; struct btrfs_trans_handle *trans; + u64 num_blocks; + u64 blocks_used; int ret = 0; int err; while(!list_empty(list)) { + struct btrfs_root *root; + mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); + num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + root = dirty->latest_root; + while(1) { trans = btrfs_start_transaction(tree_root, 1); + ret = btrfs_drop_snapshot(trans, dirty->root); if (ret != -EAGAIN) { break; } + err = btrfs_update_root(trans, tree_root, &dirty->root->root_key, @@ -414,9 +434,19 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, mutex_lock(&tree_root->fs_info->fs_mutex); } BUG_ON(ret); + + num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); + blocks_used = btrfs_root_blocks_used(&root->root_item); + if (num_blocks) { + record_root_in_trans(root); + btrfs_set_root_blocks_used(&root->root_item, + blocks_used - num_blocks); + } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); - if (ret) + if (ret) { + BUG(); break; + } ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -534,10 +564,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wake_up(&cur_trans->commit_wait); put_transaction(cur_trans); put_transaction(cur_trans); + if (root->fs_info->closing) list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots); else list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); kmem_cache_free(btrfs_trans_handle_cachep, trans); -- cgit v1.2.3 From 4b82d6e4a592016ac75e756ca519923e0fc18f00 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: Btrfs: Add mount into directory support Modified form of original patch from Christoph Hellwig to make btrfs mount into the default subvolume by default. mount /dev/somedevice:subvolumename to get other subvolumes or mount /dev/somedevice:. to get the root Signed-off-by: Chris Mason --- fs/btrfs/super.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a68101ae1a3b..8a94be1a773b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -145,11 +147,126 @@ static void btrfs_write_super(struct super_block *sb) sb->s_dirt = 0; } +/* + * This is almost a copy of get_sb_bdev in fs/super.c. + * We need the local copy to allow direct mounting of + * subvolumes, but this could be easily integrated back + * into the generic version. --hch + */ + +/* start copy & paste */ +static int set_bdev_super(struct super_block *s, void *data) +{ + s->s_bdev = data; + s->s_dev = s->s_bdev->bd_dev; + return 0; +} + +static int test_bdev_super(struct super_block *s, void *data) +{ + return (void *)s->s_bdev == data; +} + +int btrfs_get_sb_bdev(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt, const char *subvol) +{ + struct block_device *bdev = NULL; + struct super_block *s; + struct dentry *root; + int error = 0; + + bdev = open_bdev_excl(dev_name, flags, fs_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + down(&bdev->bd_mount_sem); + s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + up(&bdev->bd_mount_sem); + if (IS_ERR(s)) + goto error_s; + + if (s->s_root) { + if ((flags ^ s->s_flags) & MS_RDONLY) { + up_write(&s->s_umount); + deactivate_super(s); + error = -EBUSY; + goto error_bdev; + } + + close_bdev_excl(bdev); + } else { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + if (error) { + up_write(&s->s_umount); + deactivate_super(s); + goto error; + } + + s->s_flags |= MS_ACTIVE; + } + + if (subvol) { + root = lookup_one_len(subvol, s->s_root, strlen(subvol)); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; + } + } else { + root = dget(s->s_root); + } + + mnt->mnt_sb = s; + mnt->mnt_root = root; + return 0; + +error_s: + error = PTR_ERR(s); +error_bdev: + close_bdev_excl(bdev); +error: + return error; +} +/* end copy & paste */ + static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) + int flags, const char *identifier, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, - btrfs_fill_super, mnt); + int ret; + char *_identifier = kstrdup(identifier, GFP_KERNEL); + char *subvol_name; + const char *dev_name; + + subvol_name = _identifier; + dev_name = strsep(&subvol_name, ":"); + if (!dev_name) + return -ENOMEM; + + ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, + btrfs_fill_super, mnt, + subvol_name ? subvol_name : "default"); + kfree(_identifier); + return ret; } static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) -- cgit v1.2.3 From 95e0528919cb0c7635b37496e0ad1c41e4c0dd7c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 29 Aug 2007 09:11:44 -0400 Subject: Btrfs: Use mount -o subvol to select the subvol directory instead of dev: Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++++++ fs/btrfs/super.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ffeee546a0d7..cd75c906048f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1064,6 +1064,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, ((type *)(btrfs_leaf_data(leaf) + \ btrfs_item_offset((leaf)->items + (slot)))) +/* mount option defines and helpers */ +#define BTRFS_MOUNT_SUBVOL 0x000001 +#define btrfs_clear_opt(o, opt) o &= ~BTRFS_MOUNT_##opt +#define btrfs_set_opt(o, opt) o |= BTRFS_MOUNT_##opt +#define btrfs_test_opt(sb, opt) (BTRFS_SB(sb)->s_mount_opt & \ + BTRFS_MOUNT_##opt) /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8a94be1a773b..66a01cbbbea1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -58,6 +59,41 @@ static void btrfs_put_super (struct super_block * sb) sb->s_fs_info = NULL; } +enum { + Opt_subvol, Opt_err, +}; + +static match_table_t tokens = { + {Opt_subvol, "subvol=%s"}, + {Opt_err, NULL} +}; + +static int parse_options (char * options, + struct btrfs_root *root, + char **subvol_name) +{ + char * p; + substring_t args[MAX_OPT_ARGS]; + if (!options) + return 1; + + while ((p = strsep (&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_subvol: + *subvol_name = match_strdup(&args[0]); + break; + default: + return 0; + } + } + return 1; +} + static int btrfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; @@ -250,22 +286,15 @@ error: /* end copy & paste */ static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *identifier, void *data, struct vfsmount *mnt) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { int ret; - char *_identifier = kstrdup(identifier, GFP_KERNEL); - char *subvol_name; - const char *dev_name; - - subvol_name = _identifier; - dev_name = strsep(&subvol_name, ":"); - if (!dev_name) - return -ENOMEM; + char *subvol_name = NULL; + parse_options((char *)data, NULL, &subvol_name); ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); - kfree(_identifier); return ret; } -- cgit v1.2.3 From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 + fs/btrfs/ctree.c | 1484 ++++++++++++++++++++++++++---------------------- fs/btrfs/ctree.h | 849 ++++++++++----------------- fs/btrfs/dir-item.c | 102 ++-- fs/btrfs/disk-io.c | 391 ++++++------- fs/btrfs/disk-io.h | 55 +- fs/btrfs/extent-tree.c | 294 +++++----- fs/btrfs/extent_map.c | 672 +++++++++++++++++++++- fs/btrfs/extent_map.h | 75 ++- fs/btrfs/file-item.c | 63 +- fs/btrfs/file.c | 129 +++-- fs/btrfs/inode-item.c | 22 +- fs/btrfs/inode-map.c | 17 +- fs/btrfs/inode.c | 435 ++++++++------ fs/btrfs/print-tree.c | 96 ++-- fs/btrfs/print-tree.h | 4 +- fs/btrfs/root-tree.c | 51 +- fs/btrfs/super.c | 7 +- fs/btrfs/sysfs.c | 10 +- fs/btrfs/transaction.c | 61 +- fs/btrfs/transaction.h | 5 +- fs/btrfs/tree-defrag.c | 58 +- 22 files changed, 2794 insertions(+), 2088 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6341cf7a9f91..a3b51085d7f0 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -23,4 +23,6 @@ modules_install: clean: $(MAKE) -C $(KERNELDIR) M=`pwd` clean +tester: + $(MAKE) -C $(KERNELDIR) M=`pwd` tree-defrag.o transaction.o sysfs.o super.o root-tree.o inode-map.o inode-item.o inode.o file-item.o file.o extent_map.o disk-io.o ctree.o dir-item.o extent-tree.o endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b41f48ade419..f60920e8a0e0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -16,21 +16,24 @@ * Boston, MA 021110-1307, USA. */ +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "print-tree.h" static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level); static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size); -static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst, struct buffer_head - *src); -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf); +static int push_node_left(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *dst, + struct extent_buffer *src); +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst_buf, + struct extent_buffer *src_buf); static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot); @@ -62,40 +65,38 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) break; - btrfs_block_release(root, p->nodes[i]); + free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret, u64 search_start, u64 empty_size) +static int __btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret, + u64 search_start, u64 empty_size) { - struct buffer_head *cow; - struct btrfs_node *cow_node; + struct extent_buffer *cow; int ret = 0; int different_trans = 0; WARN_ON(root->ref_cows && trans->transid != root->last_trans); - WARN_ON(!buffer_uptodate(buf)); + cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); - cow_node = btrfs_buffer_node(cow); - if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) + if (buf->len != root->sectorsize || cow->len != root->sectorsize) WARN_ON(1); - memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); - btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); - btrfs_set_header_generation(&cow_node->header, trans->transid); - btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, root->root_key.objectid); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > - trans->transid); - if (btrfs_header_generation(btrfs_buffer_header(buf)) != - trans->transid) { + WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (btrfs_header_generation(buf) != trans->transid) { different_trans = 1; ret = btrfs_inc_ref(trans, root, buf); if (ret) @@ -106,29 +107,29 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root if (buf == root->node) { root->node = cow; - get_bh(cow); + extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, + extent_buffer_blocknr(buf), 1, 1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); } else { - btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, - bh_blocknr(cow)); + btrfs_set_node_blockptr(parent, parent_slot, + extent_buffer_blocknr(cow)); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != - trans->transid); - btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); + WARN_ON(btrfs_header_generation(parent) != trans->transid); + btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); } - btrfs_block_release(root, buf); + free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); *cow_ret = cow; return 0; } -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret) +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret) { u64 search_start; if (trans->transaction != root->fs_info->running_transaction) { @@ -141,13 +142,12 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->generation); WARN_ON(1); } - if (btrfs_header_generation(btrfs_buffer_header(buf)) == - trans->transid) { + if (btrfs_header_generation(buf) == trans->transid) { *cow_ret = buf; return 0; } - search_start = bh_blocknr(buf) & ~((u64)65535); + search_start = extent_buffer_blocknr(buf) & ~((u64)65535); return __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); } @@ -161,9 +161,11 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -static int should_defrag_leaf(struct buffer_head *bh) +#if 0 +static int should_defrag_leaf(struct extent_buffer *eb) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); + return 0; + struct btrfs_leaf *leaf = btrfs_buffer_leaf(eb); struct btrfs_disk_key *key; u32 nritems; @@ -188,14 +190,17 @@ static int should_defrag_leaf(struct buffer_head *bh) } return 0; } +#endif int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret) { + return 0; +#if 0 struct btrfs_node *parent_node; - struct buffer_head *cur_bh; - struct buffer_head *tmp_bh; + struct extent_buffer *cur_eb; + struct extent_buffer *tmp_eb; u64 blocknr; u64 search_start = *last_ret; u64 last_block = 0; @@ -281,6 +286,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, brelse(tmp_bh); } return err; +#endif } /* @@ -289,12 +295,12 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, * which is the stop of the leaf data stack */ static inline unsigned int leaf_data_end(struct btrfs_root *root, - struct btrfs_leaf *leaf) + struct extent_buffer *leaf) { - u32 nr = btrfs_header_nritems(&leaf->header); + u32 nr = btrfs_header_nritems(leaf); if (nr == 0) return BTRFS_LEAF_DATA_SIZE(root); - return btrfs_item_offset(leaf->items + nr - 1); + return btrfs_item_offset_nr(leaf, nr - 1); } /* @@ -310,9 +316,9 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) return 1; if (k1.objectid < k2->objectid) return -1; - if (k1.flags > k2->flags) + if (k1.type > k2->type) return 1; - if (k1.flags < k2->flags) + if (k1.type < k2->type) return -1; if (k1.offset > k2->offset) return 1; @@ -324,37 +330,39 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *parent = NULL; - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); + struct extent_buffer *parent = NULL; + struct extent_buffer *node = path->nodes[level]; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key node_key; int parent_slot; int slot; struct btrfs_key cpukey; - u32 nritems = btrfs_header_nritems(&node->header); + u32 nritems = btrfs_header_nritems(node); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); + parent = path->nodes[level + 1]; slot = path->slots[level]; - BUG_ON(!buffer_uptodate(path->nodes[level])); BUG_ON(nritems == 0); if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; - BUG_ON(memcmp(parent_key, &node->ptrs[0].key, + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_node_key(node, &node_key, 0); + BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&node->header)); + btrfs_header_blocknr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot - 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) <= 0); } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); - BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); + btrfs_node_key_to_cpu(node, &cpukey, slot + 1); + btrfs_node_key(node, &node_key, slot); + BUG_ON(comp_keys(&node_key, &cpukey) >= 0); } return 0; } @@ -362,83 +370,172 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); - struct btrfs_node *parent = NULL; + struct extent_buffer *leaf = path->nodes[level]; + struct extent_buffer *parent = NULL; int parent_slot; - int slot = path->slots[0]; struct btrfs_key cpukey; + struct btrfs_disk_key parent_key; + struct btrfs_disk_key leaf_key; + int slot = path->slots[0]; - u32 nritems = btrfs_header_nritems(&leaf->header); + u32 nritems = btrfs_header_nritems(leaf); if (path->nodes[level + 1]) - parent = btrfs_buffer_node(path->nodes[level + 1]); - - BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); + parent = path->nodes[level + 1]; if (nritems == 0) return 0; if (parent) { - struct btrfs_disk_key *parent_key; - parent_slot = path->slots[level + 1]; - parent_key = &parent->ptrs[parent_slot].key; + btrfs_node_key(parent, &parent_key, parent_slot); + btrfs_item_key(leaf, &leaf_key, 0); - BUG_ON(memcmp(parent_key, &leaf->items[0].key, + BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(&leaf->header)); + btrfs_header_blocknr(leaf)); + } +#if 0 + for (i = 0; nritems > 1 && i < nritems - 2; i++) { + btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); + btrfs_item_key(leaf, &leaf_key, i); + if (comp_keys(&leaf_key, &cpukey) >= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", i); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, i) != + btrfs_item_end_nr(leaf, i + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", i); + BUG_ON(1); + } + if (i == 0) { + if (btrfs_item_offset_nr(leaf, i) + + btrfs_item_size_nr(leaf, i) != + BTRFS_LEAF_DATA_SIZE(root)) { + btrfs_print_leaf(root, leaf); + printk("slot %d first offset bad\n", i); + BUG_ON(1); + } + } } - if (slot != 0) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != - btrfs_item_end(leaf->items + slot)); + if (nritems > 0) { + if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { + btrfs_print_leaf(root, leaf); + printk("slot %d bad size \n", nritems - 1); + BUG_ON(1); + } + } +#endif + if (slot != 0 && slot < nritems - 1) { + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); + if (comp_keys(&leaf_key, &cpukey) <= 0) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad key\n", slot); + BUG_ON(1); + } + if (btrfs_item_offset_nr(leaf, slot - 1) != + btrfs_item_end_nr(leaf, slot)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } if (slot < nritems - 1) { - btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); - BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); - BUG_ON(btrfs_item_offset(leaf->items + slot) != - btrfs_item_end(leaf->items + slot + 1)); + btrfs_item_key(leaf, &leaf_key, slot); + btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); + BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); + if (btrfs_item_offset_nr(leaf, slot) != + btrfs_item_end_nr(leaf, slot + 1)) { + btrfs_print_leaf(root, leaf); + printk("slot %d offset bad\n", slot); + BUG_ON(1); + } } - BUG_ON(btrfs_item_offset(leaf->items) + - btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); + BUG_ON(btrfs_item_offset_nr(leaf, 0) + + btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); return 0; } static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { - struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); - if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, - sizeof(node->header.fsid))) - BUG(); + struct extent_buffer *buf = path->nodes[level]; + char fsid[BTRFS_FSID_SIZE]; + + read_extent_buffer(buf, fsid, (unsigned long)btrfs_header_fsid(buf), + BTRFS_FSID_SIZE); + + if (memcmp(fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + int i = 0; + printk("warning bad block %Lu\n", buf->start); + if (!btrfs_buffer_uptodate(buf)) { + WARN_ON(1); + } + for (i = 0; i < BTRFS_FSID_SIZE; i++) { + printk("%x:%x ", root->fs_info->fsid[i], fsid[i]); + } + printk("\n"); + // BUG(); + } if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); } /* - * search for key in the array p. items p are item_size apart - * and there are 'max' items in p + * search for key in the extent_buffer. The items start at offset p, + * and they are item_size apart. There are 'max' items in p. + * * the slot in the array is returned via slot, and it points to * the place where you would insert key if it is not found in * the array. * * slot may point to max if the key is bigger than all of the keys */ -static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, - int max, int *slot) +static int generic_bin_search(struct extent_buffer *eb, unsigned long p, + int item_size, struct btrfs_key *key, + int max, int *slot) { int low = 0; int high = max; int mid; int ret; struct btrfs_disk_key *tmp; + struct btrfs_disk_key unaligned; + unsigned long offset; + char *map_token = NULL; + char *kaddr = NULL; + unsigned long map_start = 0; + unsigned long map_len = 0; while(low < high) { mid = (low + high) / 2; - tmp = (struct btrfs_disk_key *)(p + mid * item_size); + offset = p + mid * item_size; + + if (!map_token || offset < map_start || + (offset + sizeof(struct btrfs_disk_key)) > + map_start + map_len) { + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); + map_extent_buffer(eb, offset, &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + + } + if (offset + sizeof(struct btrfs_disk_key) > + map_start + map_len) { + unmap_extent_buffer(eb, map_token, KM_USER0); + read_extent_buffer(eb, &unaligned, + offset, sizeof(unaligned)); + map_token = NULL; + tmp = &unaligned; + } else { + tmp = (struct btrfs_disk_key *)(kaddr + offset - + map_start); + } ret = comp_keys(tmp, key); if (ret < 0) @@ -447,10 +544,13 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, high = mid; else { *slot = mid; + unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; + if (map_token) + unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -458,46 +558,42 @@ static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, * simple bin_search frontend that does the right thing for * leaves vs nodes */ -static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) +static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) { - if (btrfs_is_leaf(c)) { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; - return generic_bin_search((void *)l->items, + if (level == 0) { + return generic_bin_search(eb, + offsetof(struct btrfs_leaf, items), sizeof(struct btrfs_item), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } else { - return generic_bin_search((void *)c->ptrs, + return generic_bin_search(eb, + offsetof(struct btrfs_node, ptrs), sizeof(struct btrfs_key_ptr), - key, btrfs_header_nritems(&c->header), + key, btrfs_header_nritems(eb), slot); } return -1; } -static struct buffer_head *read_node_slot(struct btrfs_root *root, - struct buffer_head *parent_buf, - int slot) +static struct extent_buffer *read_node_slot(struct btrfs_root *root, + struct extent_buffer *parent, int slot) { - struct btrfs_node *node = btrfs_buffer_node(parent_buf); if (slot < 0) return NULL; - if (slot >= btrfs_header_nritems(&node->header)) + if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(node, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -508,60 +604,57 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (level == 0) return 0; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; /* * deal with the case where there is only one pointer in the root * by promoting the node below to a root */ - if (!parent_buf) { - struct buffer_head *child; - u64 blocknr = bh_blocknr(mid_buf); + if (!parent) { + struct extent_buffer *child; + u64 blocknr = extent_buffer_blocknr(mid); - if (btrfs_header_nritems(&mid->header) != 1) + if (btrfs_header_nritems(mid) != 1) return 0; /* promote the child to a root */ - child = read_node_slot(root, mid_buf, 0); + child = read_node_slot(root, mid, 0); BUG_ON(!child); root->node = child; path->nodes[level] = NULL; - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); /* once for the path */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); /* once for the root ptr */ - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); return btrfs_free_extent(trans, root, blocknr, 1, 1); } - parent = btrfs_buffer_node(parent_buf); - - if (btrfs_header_nritems(&mid->header) > + if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; - left_buf = read_node_slot(root, parent_buf, pslot - 1); - if (left_buf) { - wret = btrfs_cow_block(trans, root, left_buf, - parent_buf, pslot - 1, &left_buf); + left = read_node_slot(root, parent, pslot - 1); + if (left) { + wret = btrfs_cow_block(trans, root, left, + parent, pslot - 1, &left); if (wret) { ret = wret; goto enospc; } } - right_buf = read_node_slot(root, parent_buf, pslot + 1); - if (right_buf) { - wret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, &right_buf); + right = read_node_slot(root, parent, pslot + 1); + if (right) { + wret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, &right); if (wret) { ret = wret; goto enospc; @@ -569,30 +662,27 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } /* first, try to make some room in the middle buffer */ - if (left_buf) { - left = btrfs_buffer_node(left_buf); - orig_slot += btrfs_header_nritems(&left->header); - wret = push_node_left(trans, root, left_buf, mid_buf); + if (left) { + orig_slot += btrfs_header_nritems(left); + wret = push_node_left(trans, root, left, mid); if (wret < 0) ret = wret; - if (btrfs_header_nritems(&mid->header) < 2) + if (btrfs_header_nritems(mid) < 2) err_on_enospc = 1; } /* * then try to empty the right most buffer into the middle */ - if (right_buf) { - right = btrfs_buffer_node(right_buf); - wret = push_node_left(trans, root, mid_buf, right_buf); + if (right) { + wret = push_node_left(trans, root, mid, right); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (btrfs_header_nritems(&right->header) == 0) { - u64 blocknr = bh_blocknr(right_buf); - clean_tree_block(trans, root, right_buf); - wait_on_buffer(right_buf); - btrfs_block_release(root, right_buf); - right_buf = NULL; + if (btrfs_header_nritems(right) == 0) { + u64 blocknr = extent_buffer_blocknr(right); + clean_tree_block(trans, root, right); + wait_on_tree_block_writeback(root, right); + free_extent_buffer(right); right = NULL; wret = del_ptr(trans, root, path, level + 1, pslot + 1); @@ -602,14 +692,13 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } else { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key right_key; + btrfs_node_key(right, &right_key, 0); + btrfs_set_node_key(parent, &right_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); } } - if (btrfs_header_nritems(&mid->header) == 1) { + if (btrfs_header_nritems(mid) == 1) { /* * we're not allowed to leave a node with one item in the * tree during a delete. A deletion from lower in the tree @@ -619,21 +708,20 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root * otherwise we would have pulled some pointers from the * right */ - BUG_ON(!left_buf); - wret = balance_node_right(trans, root, mid_buf, left_buf); + BUG_ON(!left); + wret = balance_node_right(trans, root, mid, left); if (wret < 0) { ret = wret; goto enospc; } BUG_ON(wret == 1); } - if (btrfs_header_nritems(&mid->header) == 0) { + if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = bh_blocknr(mid_buf); - clean_tree_block(trans, root, mid_buf); - wait_on_buffer(mid_buf); - btrfs_block_release(root, mid_buf); - mid_buf = NULL; + u64 blocknr = extent_buffer_blocknr(mid); + clean_tree_block(trans, root, mid); + wait_on_tree_block_writeback(root, mid); + free_extent_buffer(mid); mid = NULL; wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) @@ -643,37 +731,36 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; } else { /* update the parent key to reflect our changes */ - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); + struct btrfs_disk_key mid_key; + btrfs_node_key(mid, &mid_key, 0); + btrfs_set_node_key(parent, &mid_key, pslot); + btrfs_mark_buffer_dirty(parent); } /* update the path */ - if (left_buf) { - if (btrfs_header_nritems(&left->header) > orig_slot) { - get_bh(left_buf); - path->nodes[level] = left_buf; + if (left) { + if (btrfs_header_nritems(left) > orig_slot) { + extent_buffer_get(left); + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - if (mid_buf) - btrfs_block_release(root, mid_buf); + if (mid) + free_extent_buffer(mid); } else { - orig_slot -= btrfs_header_nritems(&left->header); + orig_slot -= btrfs_header_nritems(left); path->slots[level] = orig_slot; } } /* double check we haven't messed things up */ check_block(root, path, level); if (orig_ptr != - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), - path->slots[level])) + btrfs_node_blockptr(path->nodes[level], path->slots[level])) BUG(); enospc: - if (right_buf) - btrfs_block_release(root, right_buf); - if (left_buf) - btrfs_block_release(root, left_buf); + if (right) + free_extent_buffer(right); + if (left) + free_extent_buffer(left); return ret; } @@ -682,14 +769,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *right_buf; - struct buffer_head *mid_buf; - struct buffer_head *left_buf; - struct buffer_head *parent_buf = NULL; - struct btrfs_node *right = NULL; - struct btrfs_node *mid; - struct btrfs_node *left = NULL; - struct btrfs_node *parent = NULL; + struct extent_buffer *right = NULL; + struct extent_buffer *mid; + struct extent_buffer *left = NULL; + struct extent_buffer *parent = NULL; int ret = 0; int wret; int pslot; @@ -699,107 +782,101 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, if (level == 0) return 1; - mid_buf = path->nodes[level]; - mid = btrfs_buffer_node(mid_buf); + mid = path->nodes[level]; orig_ptr = btrfs_node_blockptr(mid, orig_slot); if (level < BTRFS_MAX_LEVEL - 1) - parent_buf = path->nodes[level + 1]; + parent = path->nodes[level + 1]; pslot = path->slots[level + 1]; - if (!parent_buf) + if (!parent) return 1; - parent = btrfs_buffer_node(parent_buf); - left_buf = read_node_slot(root, parent_buf, pslot - 1); + left = read_node_slot(root, parent, pslot - 1); /* first, try to make some room in the middle buffer */ - if (left_buf) { + if (left) { u32 left_nr; - left = btrfs_buffer_node(left_buf); - left_nr = btrfs_header_nritems(&left->header); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, left_buf, parent_buf, - pslot - 1, &left_buf); + ret = btrfs_cow_block(trans, root, left, parent, + pslot - 1, &left); if (ret) wret = 1; else { - left = btrfs_buffer_node(left_buf); wret = push_node_left(trans, root, - left_buf, mid_buf); + left, mid); } } if (wret < 0) ret = wret; if (wret == 0) { + struct btrfs_disk_key disk_key; orig_slot += left_nr; - btrfs_memcpy(root, parent, - &parent->ptrs[pslot].key, - &mid->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&left->header) > orig_slot) { - path->nodes[level] = left_buf; + btrfs_node_key(mid, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot); + btrfs_mark_buffer_dirty(parent); + if (btrfs_header_nritems(left) > orig_slot) { + path->nodes[level] = left; path->slots[level + 1] -= 1; path->slots[level] = orig_slot; - btrfs_block_release(root, mid_buf); + free_extent_buffer(mid); } else { orig_slot -= - btrfs_header_nritems(&left->header); + btrfs_header_nritems(left); path->slots[level] = orig_slot; - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } check_node(root, path, level); return 0; } - btrfs_block_release(root, left_buf); + free_extent_buffer(left); } - right_buf = read_node_slot(root, parent_buf, pslot + 1); + right= read_node_slot(root, parent, pslot + 1); /* * then try to empty the right most buffer into the middle */ - if (right_buf) { + if (right) { u32 right_nr; - right = btrfs_buffer_node(right_buf); - right_nr = btrfs_header_nritems(&right->header); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; } else { - ret = btrfs_cow_block(trans, root, right_buf, - parent_buf, pslot + 1, - &right_buf); + ret = btrfs_cow_block(trans, root, right, + parent, pslot + 1, + &right); if (ret) wret = 1; else { - right = btrfs_buffer_node(right_buf); wret = balance_node_right(trans, root, - right_buf, mid_buf); + right, mid); } } if (wret < 0) ret = wret; if (wret == 0) { - btrfs_memcpy(root, parent, - &parent->ptrs[pslot + 1].key, - &right->ptrs[0].key, - sizeof(struct btrfs_disk_key)); - btrfs_mark_buffer_dirty(parent_buf); - if (btrfs_header_nritems(&mid->header) <= orig_slot) { - path->nodes[level] = right_buf; + struct btrfs_disk_key disk_key; + + btrfs_node_key(right, &disk_key, 0); + btrfs_set_node_key(parent, &disk_key, pslot + 1); + btrfs_mark_buffer_dirty(parent); + + if (btrfs_header_nritems(mid) <= orig_slot) { + path->nodes[level] = right; path->slots[level + 1] += 1; path->slots[level] = orig_slot - - btrfs_header_nritems(&mid->header); - btrfs_block_release(root, mid_buf); + btrfs_header_nritems(mid); + free_extent_buffer(mid); } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 0; } - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } check_node(root, path, level); return 1; @@ -811,10 +888,9 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; + struct extent_buffer *node; int i; u32 nritems; - u64 item_objectid; u64 blocknr; u64 search; u64 cluster_start; @@ -823,7 +899,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int direction = path->reada; struct radix_tree_root found; unsigned long gang[8]; - struct buffer_head *bh; + struct extent_buffer *eb; if (level == 0) return; @@ -831,18 +907,17 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, if (!path->nodes[level]) return; - node = btrfs_buffer_node(path->nodes[level]); + node = path->nodes[level]; search = btrfs_node_blockptr(node, slot); - bh = btrfs_find_tree_block(root, search); - if (bh) { - brelse(bh); + eb = btrfs_find_tree_block(root, search); + if (eb) { + free_extent_buffer(eb); return; } init_bit_radix(&found); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = slot; i < nritems; i++) { - item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); blocknr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } @@ -886,8 +961,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow) { - struct buffer_head *b; - struct btrfs_node *c; + struct extent_buffer *b; u64 blocknr; int slot; int ret; @@ -901,10 +975,9 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); again: b = root->node; - get_bh(b); + extent_buffer_get(b); while (b) { - c = btrfs_buffer_node(b); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); if (cow) { int wret; wret = btrfs_cow_block(trans, root, b, @@ -912,32 +985,30 @@ again: p->slots[level + 1], &b); if (wret) { - btrfs_block_release(root, b); + free_extent_buffer(b); return wret; } - c = btrfs_buffer_node(b); } BUG_ON(!cow && ins_len); - if (level != btrfs_header_level(&c->header)) + if (level != btrfs_header_level(b)) WARN_ON(1); - level = btrfs_header_level(&c->header); + level = btrfs_header_level(b); p->nodes[level] = b; ret = check_block(root, p, level); if (ret) return -1; - ret = bin_search(c, key, &slot); - if (!btrfs_is_leaf(c)) { + ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; p->slots[level] = slot; - if (ins_len > 0 && btrfs_header_nritems(&c->header) >= + if (ins_len > 0 && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { int sret = split_node(trans, root, p, level); BUG_ON(sret > 0); if (sret) return sret; b = p->nodes[level]; - c = btrfs_buffer_node(b); slot = p->slots[level]; } else if (ins_len < 0) { int sret = balance_level(trans, root, p, @@ -947,22 +1018,19 @@ again: b = p->nodes[level]; if (!b) goto again; - c = btrfs_buffer_node(b); slot = p->slots[level]; - BUG_ON(btrfs_header_nritems(&c->header) == 1); + BUG_ON(btrfs_header_nritems(b) == 1); } /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(c, slot); + blocknr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(c, slot)); - + b = read_tree_block(root, btrfs_node_blockptr(b, slot)); } else { - struct btrfs_leaf *l = (struct btrfs_leaf *)c; p->slots[level] = slot; - if (ins_len > 0 && btrfs_leaf_free_space(root, l) < + if (ins_len > 0 && btrfs_leaf_free_space(root, b) < sizeof(struct btrfs_item) + ins_len) { int sret = split_leaf(trans, root, key, p, ins_len); @@ -986,19 +1054,20 @@ again: * If this fails to write a tree block, it returns -1, but continues * fixing up the blocks in ram so the tree is consistent. */ -static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_disk_key - *key, int level) +static int fixup_low_keys(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + struct btrfs_disk_key *key, int level) { int i; int ret = 0; + struct extent_buffer *t; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { - struct btrfs_node *t; int tslot = path->slots[i]; if (!path->nodes[i]) break; - t = btrfs_buffer_node(path->nodes[i]); - btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); + t = path->nodes[i]; + btrfs_set_node_key(t, key, tslot); btrfs_mark_buffer_dirty(path->nodes[i]); if (tslot != 0) break; @@ -1014,18 +1083,16 @@ static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root * error, and > 0 if there was no room in the left hand block. */ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *dst_buf, struct - buffer_head *src_buf) + *root, struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; if (push_items <= 0) { @@ -1035,17 +1102,21 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root if (src_nritems < push_items) push_items = src_nritems; - btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(dst_nritems), + btrfs_node_key_ptr_offset(0), + push_items * sizeof(struct btrfs_key_ptr)); + if (push_items < src_nritems) { - btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, - (src_nritems - push_items) * - sizeof(struct btrfs_key_ptr)); - } - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(push_items), + (src_nritems - push_items) * + sizeof(struct btrfs_key_ptr)); + } + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1058,24 +1129,22 @@ static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root * * this will only push up to 1/2 the contents of the left node over */ -static int balance_node_right(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct buffer_head *dst_buf, - struct buffer_head *src_buf) +static int balance_node_right(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *dst, + struct extent_buffer *src) { - struct btrfs_node *src = btrfs_buffer_node(src_buf); - struct btrfs_node *dst = btrfs_buffer_node(dst_buf); int push_items = 0; int max_push; int src_nritems; int dst_nritems; int ret = 0; - src_nritems = btrfs_header_nritems(&src->header); - dst_nritems = btrfs_header_nritems(&dst->header); + src_nritems = btrfs_header_nritems(src); + dst_nritems = btrfs_header_nritems(dst); push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; - if (push_items <= 0) { + if (push_items <= 0) return 1; - } max_push = src_nritems / 2 + 1; /* don't try to empty the node */ @@ -1085,18 +1154,21 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct if (max_push < push_items) push_items = max_push; - btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, - dst_nritems * sizeof(struct btrfs_key_ptr)); + memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), + btrfs_node_key_ptr_offset(0), + (dst_nritems) * + sizeof(struct btrfs_key_ptr)); - btrfs_memcpy(root, dst, dst->ptrs, - src->ptrs + src_nritems - push_items, - push_items * sizeof(struct btrfs_key_ptr)); + copy_extent_buffer(dst, src, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(src_nritems - push_items), + push_items * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&src->header, src_nritems - push_items); - btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); + btrfs_set_header_nritems(src, src_nritems - push_items); + btrfs_set_header_nritems(dst, dst_nritems + push_items); - btrfs_mark_buffer_dirty(src_buf); - btrfs_mark_buffer_dirty(dst_buf); + btrfs_mark_buffer_dirty(src); + btrfs_mark_buffer_dirty(dst); return ret; } @@ -1107,45 +1179,46 @@ static int balance_node_right(struct btrfs_trans_handle *trans, struct * * returns zero on success or < 0 on failure. */ -static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, int level) +static int insert_new_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *lower; - struct btrfs_node *c; - struct btrfs_disk_key *lower_key; + struct extent_buffer *lower; + struct extent_buffer *c; + struct btrfs_disk_key lower_key; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); - if (IS_ERR(t)) - return PTR_ERR(t); - c = btrfs_buffer_node(t); - memset(c, 0, root->blocksize); - btrfs_set_header_nritems(&c->header, 1); - btrfs_set_header_level(&c->header, level); - btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); - btrfs_set_header_generation(&c->header, trans->transid); - btrfs_set_header_owner(&c->header, root->root_key.objectid); - lower = btrfs_buffer_node(path->nodes[level-1]); - memcpy(c->header.fsid, root->fs_info->disk_super->fsid, - sizeof(c->header.fsid)); - if (btrfs_is_leaf(lower)) - lower_key = &((struct btrfs_leaf *)lower)->items[0].key; + c = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(root->node), 0); + if (IS_ERR(c)) + return PTR_ERR(c); + memset_extent_buffer(c, 0, 0, root->nodesize); + btrfs_set_header_nritems(c, 1); + btrfs_set_header_level(c, level); + btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_generation(c, trans->transid); + btrfs_set_header_owner(c, root->root_key.objectid); + lower = path->nodes[level-1]; + + write_extent_buffer(c, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(c), + BTRFS_FSID_SIZE); + if (level == 1) + btrfs_item_key(lower, &lower_key, 0); else - lower_key = &lower->ptrs[0].key; - btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); + btrfs_node_key(lower, &lower_key, 0); + btrfs_set_node_key(c, &lower_key, 0); + btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); - btrfs_mark_buffer_dirty(t); + btrfs_mark_buffer_dirty(c); /* the super has an extra ref to root->node */ - btrfs_block_release(root, root->node); - root->node = t; - get_bh(t); - path->nodes[level] = t; + free_extent_buffer(root->node); + root->node = c; + extent_buffer_get(c); + path->nodes[level] = c; path->slots[level] = 0; return 0; } @@ -1163,26 +1236,26 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 blocknr, int slot, int level) { - struct btrfs_node *lower; + struct extent_buffer *lower; int nritems; BUG_ON(!path->nodes[level]); - lower = btrfs_buffer_node(path->nodes[level]); - nritems = btrfs_header_nritems(&lower->header); + lower = path->nodes[level]; + nritems = btrfs_header_nritems(lower); if (slot > nritems) BUG(); if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) BUG(); if (slot != nritems) { - btrfs_memmove(root, lower, lower->ptrs + slot + 1, - lower->ptrs + slot, + memmove_extent_buffer(lower, + btrfs_node_key_ptr_offset(slot + 1), + btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - btrfs_memcpy(root, lower, &lower->ptrs[slot].key, - key, sizeof(struct btrfs_disk_key)); + btrfs_set_node_key(lower, key, slot); btrfs_set_node_blockptr(lower, slot, blocknr); - btrfs_set_header_nritems(&lower->header, nritems + 1); - btrfs_mark_buffer_dirty(path->nodes[level]); + btrfs_set_header_nritems(lower, nritems + 1); + btrfs_mark_buffer_dirty(lower); check_node(root, path, level); return 0; } @@ -1199,69 +1272,73 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) { - struct buffer_head *t; - struct btrfs_node *c; - struct buffer_head *split_buffer; - struct btrfs_node *split; + struct extent_buffer *c; + struct extent_buffer *split; + struct btrfs_disk_key disk_key; int mid; int ret; int wret; u32 c_nritems; - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (t == root->node) { + c = path->nodes[level]; + if (c == root->node) { /* trying to split the root, lets make a new one */ ret = insert_new_root(trans, root, path, level + 1); if (ret) return ret; } else { ret = push_nodes_for_insert(trans, root, path, level); - t = path->nodes[level]; - c = btrfs_buffer_node(t); - if (!ret && - btrfs_header_nritems(&c->header) < + c = path->nodes[level]; + if (!ret && btrfs_header_nritems(c) < BTRFS_NODEPTRS_PER_BLOCK(root) - 1) return 0; if (ret < 0) return ret; } - c_nritems = btrfs_header_nritems(&c->header); - split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); - if (IS_ERR(split_buffer)) - return PTR_ERR(split_buffer); + c_nritems = btrfs_header_nritems(c); + split = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(c), 0); + if (IS_ERR(split)) + return PTR_ERR(split); + + btrfs_set_header_flags(split, btrfs_header_flags(c)); + btrfs_set_header_level(split, btrfs_header_level(c)); + btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_generation(split, trans->transid); + btrfs_set_header_owner(split, root->root_key.objectid); + write_extent_buffer(split, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(split), + BTRFS_FSID_SIZE); - split = btrfs_buffer_node(split_buffer); - btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); - btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); - btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); - btrfs_set_header_generation(&split->header, trans->transid); - btrfs_set_header_owner(&split->header, root->root_key.objectid); - memcpy(split->header.fsid, root->fs_info->disk_super->fsid, - sizeof(split->header.fsid)); mid = (c_nritems + 1) / 2; - btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, - (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); - btrfs_set_header_nritems(&split->header, c_nritems - mid); - btrfs_set_header_nritems(&c->header, mid); + + copy_extent_buffer(split, c, + btrfs_node_key_ptr_offset(0), + btrfs_node_key_ptr_offset(mid), + (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); + btrfs_set_header_nritems(split, c_nritems - mid); + btrfs_set_header_nritems(c, mid); ret = 0; - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(split_buffer); - wret = insert_ptr(trans, root, path, &split->ptrs[0].key, - bh_blocknr(split_buffer), path->slots[level + 1] + 1, + btrfs_mark_buffer_dirty(c); + btrfs_mark_buffer_dirty(split); + + btrfs_node_key(split, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(split), + path->slots[level + 1] + 1, level + 1); if (wret) ret = wret; if (path->slots[level] >= mid) { path->slots[level] -= mid; - btrfs_block_release(root, t); - path->nodes[level] = split_buffer; + free_extent_buffer(c); + path->nodes[level] = split; path->slots[level + 1] += 1; } else { - btrfs_block_release(root, split_buffer); + free_extent_buffer(split); } return ret; } @@ -1271,16 +1348,16 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root * and nr indicate which items in the leaf to check. This totals up the * space used both by the item structs and the item data */ -static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) +static int leaf_space_used(struct extent_buffer *l, int start, int nr) { int data_len; - int nritems = btrfs_header_nritems(&l->header); + int nritems = btrfs_header_nritems(l); int end = min(nritems, start + nr) - 1; if (!nr) return 0; - data_len = btrfs_item_end(l->items + start); - data_len = data_len - btrfs_item_offset(l->items + end); + data_len = btrfs_item_end_nr(l, start); + data_len = data_len - btrfs_item_offset_nr(l, end); data_len += sizeof(struct btrfs_item) * nr; WARN_ON(data_len < 0); return data_len; @@ -1291,10 +1368,17 @@ static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) * the start of the leaf data. IOW, how much room * the leaf has left for both items and data */ -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) { - int nritems = btrfs_header_nritems(&leaf->header); - return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + int nritems = btrfs_header_nritems(leaf); + int ret; + ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); + if (ret < 0) { + printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", + ret, BTRFS_LEAF_DATA_SIZE(root), + leaf_space_used(leaf, 0, nritems), nritems); + } + return ret; } /* @@ -1307,12 +1391,10 @@ int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *left_buf = path->nodes[0]; - struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); - struct btrfs_leaf *right; - struct buffer_head *right_buf; - struct buffer_head *upper; - struct btrfs_node *upper_node; + struct extent_buffer *left = path->nodes[0]; + struct extent_buffer *right; + struct extent_buffer *upper; + struct btrfs_disk_key disk_key; int slot; int i; int free_space; @@ -1321,6 +1403,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_item *item; u32 left_nritems; u32 right_nritems; + u32 data_end; int ret; slot = path->slots[1]; @@ -1328,102 +1411,109 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } upper = path->nodes[1]; - upper_node = btrfs_buffer_node(upper); - if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { + if (slot >= btrfs_header_nritems(upper) - 1) return 1; - } - right_buf = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); - right = btrfs_buffer_leaf(right_buf); + + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + /* cow and double check */ - ret = btrfs_cow_block(trans, root, right_buf, upper, - slot + 1, &right_buf); + ret = btrfs_cow_block(trans, root, right, upper, + slot + 1, &right); if (ret) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - right = btrfs_buffer_leaf(right_buf); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } - left_nritems = btrfs_header_nritems(&left->header); + left_nritems = btrfs_header_nritems(left); if (left_nritems == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + for (i = left_nritems - 1; i >= 1; i--) { - item = left->items + i; + item = btrfs_item_nr(left, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(left, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(left, item) + sizeof(*item); } + if (push_items == 0) { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); return 1; } + if (push_items == left_nritems) WARN_ON(1); - right_nritems = btrfs_header_nritems(&right->header); + /* push left to right */ - push_space = btrfs_item_end(left->items + left_nritems - push_items); + right_nritems = btrfs_header_nritems(right); + push_space = btrfs_item_end_nr(left, left_nritems - push_items); push_space -= leaf_data_end(root, left); + /* make room in the right data area */ - btrfs_memmove(root, right, btrfs_leaf_data(right) + - leaf_data_end(root, right) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - - leaf_data_end(root, right)); + data_end = leaf_data_end(root, right); + memmove_extent_buffer(right, + btrfs_leaf_data(right) + data_end - push_space, + btrfs_leaf_data(right) + data_end, + BTRFS_LEAF_DATA_SIZE(root) - data_end); + /* copy from the left data area */ - btrfs_memcpy(root, right, btrfs_leaf_data(right) + + copy_extent_buffer(right, left, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - push_space, btrfs_leaf_data(left) + leaf_data_end(root, left), push_space); - btrfs_memmove(root, right, right->items + push_items, right->items, - right_nritems * sizeof(struct btrfs_item)); + + memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), + btrfs_item_nr_offset(0), + right_nritems * sizeof(struct btrfs_item)); + /* copy the items from left to right */ - btrfs_memcpy(root, right, right->items, left->items + - left_nritems - push_items, - push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(right, left, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(left_nritems - push_items), + push_items * sizeof(struct btrfs_item)); /* update the item pointers */ right_nritems += push_items; - btrfs_set_header_nritems(&right->header, right_nritems); + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } left_nritems -= push_items; - btrfs_set_header_nritems(&left->header, left_nritems); + btrfs_set_header_nritems(left, left_nritems); - btrfs_mark_buffer_dirty(left_buf); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, - &right->items[0].key, sizeof(struct btrfs_disk_key)); + btrfs_item_key(right, &disk_key, 0); + btrfs_set_node_key(upper, &disk_key, slot + 1); btrfs_mark_buffer_dirty(upper); /* then fixup the leaf pointer in the path */ if (path->slots[0] >= left_nritems) { path->slots[0] -= left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buf; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[1] += 1; } else { - btrfs_block_release(root, right_buf); + free_extent_buffer(right); } if (path->nodes[1]) check_node(root, path, 1); @@ -1436,10 +1526,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int data_size) { - struct buffer_head *right_buf = path->nodes[0]; - struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); - struct buffer_head *t; - struct btrfs_leaf *left; + struct btrfs_disk_key disk_key; + struct extent_buffer *right = path->nodes[0]; + struct extent_buffer *left; int slot; int i; int free_space; @@ -1447,119 +1536,128 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root int push_items = 0; struct btrfs_item *item; u32 old_left_nritems; + u32 right_nritems; int ret = 0; int wret; slot = path->slots[1]; - if (slot == 0) { + if (slot == 0) return 1; - } - if (!path->nodes[1]) { + if (!path->nodes[1]) return 1; - } - t = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); - left = btrfs_buffer_leaf(t); + + left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], + slot - 1)); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } /* cow and double check */ - ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); + ret = btrfs_cow_block(trans, root, left, + path->nodes[1], slot - 1, &left); if (ret) { /* we hit -ENOSPC, but it isn't fatal here */ - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - left = btrfs_buffer_leaf(t); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (btrfs_header_nritems(&right->header) == 0) { - btrfs_block_release(root, t); + right_nritems = btrfs_header_nritems(right); + if (right_nritems == 0) { + free_extent_buffer(left); return 1; } - for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { - item = right->items + i; + for (i = 0; i < right_nritems - 1; i++) { + item = btrfs_item_nr(right, i); if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(item) + sizeof(*item) + push_space > + if (btrfs_item_size(right, item) + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(item) + sizeof(*item); + push_space += btrfs_item_size(right, item) + sizeof(*item); } if (push_items == 0) { - btrfs_block_release(root, t); + free_extent_buffer(left); return 1; } - if (push_items == btrfs_header_nritems(&right->header)) + if (push_items == btrfs_header_nritems(right)) WARN_ON(1); + /* push data from right to left */ - btrfs_memcpy(root, left, left->items + - btrfs_header_nritems(&left->header), - right->items, push_items * sizeof(struct btrfs_item)); + copy_extent_buffer(left, right, + btrfs_item_nr_offset(btrfs_header_nritems(left)), + btrfs_item_nr_offset(0), + push_items * sizeof(struct btrfs_item)); + push_space = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(right->items + push_items -1); - btrfs_memcpy(root, left, btrfs_leaf_data(left) + + btrfs_item_offset_nr(right, push_items -1); + + copy_extent_buffer(left, right, btrfs_leaf_data(left) + leaf_data_end(root, left) - push_space, btrfs_leaf_data(right) + - btrfs_item_offset(right->items + push_items - 1), + btrfs_item_offset_nr(right, push_items - 1), push_space); - old_left_nritems = btrfs_header_nritems(&left->header); + old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { - u32 ioff = btrfs_item_offset(left->items + i); - btrfs_set_item_offset(left->items + i, ioff - - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset(left->items + - old_left_nritems - 1))); + u32 ioff; + item = btrfs_item_nr(left, i); + ioff = btrfs_item_offset(left, item); + btrfs_set_item_offset(left, item, + ioff - (BTRFS_LEAF_DATA_SIZE(root) - + btrfs_item_offset_nr(left, old_left_nritems - 1))); } - btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); + btrfs_set_header_nritems(left, old_left_nritems + push_items); /* fixup right node */ - push_space = btrfs_item_offset(right->items + push_items - 1) - - leaf_data_end(root, right); - btrfs_memmove(root, right, btrfs_leaf_data(right) + - BTRFS_LEAF_DATA_SIZE(root) - push_space, - btrfs_leaf_data(right) + - leaf_data_end(root, right), push_space); - btrfs_memmove(root, right, right->items, right->items + push_items, - (btrfs_header_nritems(&right->header) - push_items) * - sizeof(struct btrfs_item)); - btrfs_set_header_nritems(&right->header, - btrfs_header_nritems(&right->header) - - push_items); + push_space = btrfs_item_offset_nr(right, push_items - 1) - + leaf_data_end(root, right); + memmove_extent_buffer(right, btrfs_leaf_data(right) + + BTRFS_LEAF_DATA_SIZE(root) - push_space, + btrfs_leaf_data(right) + + leaf_data_end(root, right), push_space); + + memmove_extent_buffer(right, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(push_items), + (btrfs_header_nritems(right) - push_items) * + sizeof(struct btrfs_item)); + + right_nritems = btrfs_header_nritems(right) - push_items; + btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - btrfs_set_item_offset(right->items + i, push_space - - btrfs_item_size(right->items + i)); - push_space = btrfs_item_offset(right->items + i); + for (i = 0; i < right_nritems; i++) { + item = btrfs_item_nr(right, i); + btrfs_set_item_offset(right, item, push_space - + btrfs_item_size(right, item)); + push_space = btrfs_item_offset(right, item); } - btrfs_mark_buffer_dirty(t); - btrfs_mark_buffer_dirty(right_buf); + btrfs_mark_buffer_dirty(left); + btrfs_mark_buffer_dirty(right); - wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); + btrfs_item_key(right, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, 1); if (wret) ret = wret; /* then fixup the leaf pointer in the path */ if (path->slots[0] < push_items) { path->slots[0] += old_left_nritems; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = t; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = left; path->slots[1] -= 1; } else { - btrfs_block_release(root, t); + free_extent_buffer(left); path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); @@ -1578,13 +1676,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *ins_key, struct btrfs_path *path, int data_size) { - struct buffer_head *l_buf; - struct btrfs_leaf *l; + struct extent_buffer *l; u32 nritems; int mid; int slot; - struct btrfs_leaf *right; - struct buffer_head *right_buffer; + struct extent_buffer *right; int space_needed = data_size + sizeof(struct btrfs_item); int data_copy_size; int rt_data_off; @@ -1603,8 +1699,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) return wret; } - l_buf = path->nodes[0]; - l = btrfs_buffer_leaf(l_buf); + l = path->nodes[0]; /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= @@ -1617,36 +1712,38 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } slot = path->slots[0]; - nritems = btrfs_header_nritems(&l->header); + nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + if (mid <= slot) { if (nritems == 1 || leaf_space_used(l, mid, nritems - mid) + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { if (slot >= nritems) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; path->slots[1] += 1; return ret; @@ -1659,15 +1756,15 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BTRFS_LEAF_DATA_SIZE(root)) { if (slot == 0) { btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; if (path->slots[1] == 0) { wret = fixup_low_keys(trans, root, @@ -1681,61 +1778,74 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root double_split = 1; } } - btrfs_set_header_nritems(&right->header, nritems - mid); - data_copy_size = btrfs_item_end(l->items + mid) - - leaf_data_end(root, l); - btrfs_memcpy(root, right, right->items, l->items + mid, - (nritems - mid) * sizeof(struct btrfs_item)); - btrfs_memcpy(root, right, + nritems = nritems - mid; + btrfs_set_header_nritems(right, nritems); + data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); + + copy_extent_buffer(right, l, btrfs_item_nr_offset(0), + btrfs_item_nr_offset(mid), + nritems * sizeof(struct btrfs_item)); + + copy_extent_buffer(right, l, btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - data_copy_size, btrfs_leaf_data(l) + leaf_data_end(root, l), data_copy_size); + rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_end(l->items + mid); + btrfs_item_end_nr(l, mid); - for (i = 0; i < btrfs_header_nritems(&right->header); i++) { - u32 ioff = btrfs_item_offset(right->items + i); - btrfs_set_item_offset(right->items + i, ioff + rt_data_off); + for (i = 0; i < nritems; i++) { + struct btrfs_item *item = btrfs_item_nr(right, i); + u32 ioff = btrfs_item_offset(right, item); + btrfs_set_item_offset(right, item, ioff + rt_data_off); } - btrfs_set_header_nritems(&l->header, mid); + btrfs_set_header_nritems(l, mid); ret = 0; - wret = insert_ptr(trans, root, path, &right->items[0].key, - bh_blocknr(right_buffer), path->slots[1] + 1, 1); + btrfs_item_key(right, &disk_key, 0); + wret = insert_ptr(trans, root, path, &disk_key, + extent_buffer_blocknr(right), path->slots[1] + 1, 1); if (wret) ret = wret; - btrfs_mark_buffer_dirty(right_buffer); - btrfs_mark_buffer_dirty(l_buf); + + btrfs_mark_buffer_dirty(right); + btrfs_mark_buffer_dirty(l); BUG_ON(path->slots[0] != slot); + if (mid <= slot) { - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] -= mid; path->slots[1] += 1; } else - btrfs_block_release(root, right_buffer); + free_extent_buffer(right); + BUG_ON(path->slots[0] < 0); check_node(root, path, 1); + check_leaf(root, path, 0); if (!double_split) return ret; - right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); - if (IS_ERR(right_buffer)) - return PTR_ERR(right_buffer); - - right = btrfs_buffer_leaf(right_buffer); - memset(&right->header, 0, sizeof(right->header)); - btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); - btrfs_set_header_generation(&right->header, trans->transid); - btrfs_set_header_owner(&right->header, root->root_key.objectid); - btrfs_set_header_level(&right->header, 0); - memcpy(right->header.fsid, root->fs_info->disk_super->fsid, - sizeof(right->header.fsid)); + + right = btrfs_alloc_free_block(trans, root, + extent_buffer_blocknr(l), 0); + if (IS_ERR(right)) + return PTR_ERR(right); + + memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); + btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_generation(right, trans->transid); + btrfs_set_header_owner(right, root->root_key.objectid); + btrfs_set_header_level(right, 0); + write_extent_buffer(right, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(right), + BTRFS_FSID_SIZE); + btrfs_cpu_key_to_disk(&disk_key, ins_key); - btrfs_set_header_nritems(&right->header, 0); + btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - bh_blocknr(right_buffer), + extent_buffer_blocknr(right), path->slots[1], 1); if (wret) ret = wret; @@ -1744,8 +1854,8 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root if (wret) ret = wret; } - btrfs_block_release(root, path->nodes[0]); - path->nodes[0] = right_buffer; + free_extent_buffer(path->nodes[0]); + path->nodes[0] = right; path->slots[0] = 0; check_node(root, path, 1); check_leaf(root, path, 0); @@ -1760,8 +1870,8 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data_start; @@ -1770,15 +1880,14 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); slot = path->slots[0]; - old_data_start = btrfs_item_offset(leaf->items + slot); - old_size = btrfs_item_size(leaf->items + slot); + old_data_start = btrfs_item_offset_nr(leaf, slot); + old_size = btrfs_item_size_nr(leaf, slot); BUG_ON(old_size <= new_size); size_diff = old_size - new_size; @@ -1790,32 +1899,38 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff + size_diff); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + size_diff); } /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + data_end, old_data_start + new_size - data_end); - btrfs_set_item_size(leaf->items + slot, new_size); - btrfs_mark_buffer_dirty(leaf_buf); + + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, new_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } -int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, u32 data_size) +int btrfs_extend_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *path, + u32 data_size) { int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; u32 nritems; unsigned int data_end; unsigned int old_data; @@ -1823,16 +1938,17 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int i; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); - if (btrfs_leaf_free_space(root, leaf) < data_size) + if (btrfs_leaf_free_space(root, leaf) < data_size) { + btrfs_print_leaf(root, leaf); BUG(); + } slot = path->slots[0]; - old_data = btrfs_item_end(leaf->items + slot); + old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); BUG_ON(slot >= nritems); @@ -1842,22 +1958,28 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } + /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); + data_end = old_data; - old_size = btrfs_item_size(leaf->items + slot); - btrfs_set_item_size(leaf->items + slot, old_size + data_size); - btrfs_mark_buffer_dirty(leaf_buf); + old_size = btrfs_item_size_nr(leaf, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_size(leaf, item, old_size + data_size); + btrfs_mark_buffer_dirty(leaf); ret = 0; - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); return ret; } @@ -1866,15 +1988,16 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root * Given a key and some data, insert an item into the tree. * This does all the path init required, making room in the tree if needed. */ -int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_path *path, struct btrfs_key - *cpu_key, u32 data_size) +int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *cpu_key, u32 data_size) { + struct extent_buffer *leaf; + struct btrfs_item *item; int ret = 0; int slot; int slot_orig; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; u32 nritems; unsigned int data_end; struct btrfs_disk_key disk_key; @@ -1884,6 +2007,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root /* create a root if there isn't one */ if (!root->node) BUG(); + ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); if (ret == 0) { return -EEXIST; @@ -1892,57 +2016,68 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; slot_orig = path->slots[0]; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; - nritems = btrfs_header_nritems(&leaf->header); + nritems = btrfs_header_nritems(leaf); data_end = leaf_data_end(root, leaf); if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { BUG(); } + slot = path->slots[0]; BUG_ON(slot < 0); + if (slot != nritems) { int i; - unsigned int old_data = btrfs_item_end(leaf->items + slot); + unsigned int old_data = btrfs_item_end_nr(leaf, slot); + if (old_data < data_end) { + btrfs_print_leaf(root, leaf); + printk("slot %d old_data %d data_end %d\n", + slot, old_data, data_end); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ for (i = slot; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, - ioff - data_size); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff - data_size); } /* shift the items */ - btrfs_memmove(root, leaf, leaf->items + slot + 1, - leaf->items + slot, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), + btrfs_item_nr_offset(slot), (nritems - slot) * sizeof(struct btrfs_item)); /* shift the data */ - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + data_end, old_data - data_end); data_end = old_data; } + /* setup the item for the new data */ - btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, - sizeof(struct btrfs_disk_key)); - btrfs_set_item_offset(leaf->items + slot, data_end - data_size); - btrfs_set_item_size(leaf->items + slot, data_size); - btrfs_set_header_nritems(&leaf->header, nritems + 1); - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_set_item_key(leaf, &disk_key, slot); + item = btrfs_item_nr(leaf, slot); + btrfs_set_item_offset(leaf, item, data_end - data_size); + btrfs_set_item_size(leaf, item, data_size); + btrfs_set_header_nritems(leaf, nritems + 1); + btrfs_mark_buffer_dirty(leaf); ret = 0; if (slot == 0) ret = fixup_low_keys(trans, root, path, &disk_key, 1); - if (btrfs_leaf_free_space(root, leaf) < 0) + if (btrfs_leaf_free_space(root, leaf) < 0) { + btrfs_print_leaf(root, leaf); BUG(); + } check_leaf(root, path, 0); out: return ret; @@ -1958,17 +2093,17 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root { int ret = 0; struct btrfs_path *path; - u8 *ptr; + struct extent_buffer *leaf; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (!ret) { - ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], u8); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, data, data_size); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, data, ptr, data_size); + btrfs_mark_buffer_dirty(leaf); } btrfs_free_path(path); return ret; @@ -1984,30 +2119,30 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { - struct btrfs_node *node; - struct buffer_head *parent = path->nodes[level]; + struct extent_buffer *parent = path->nodes[level]; u32 nritems; int ret = 0; int wret; - node = btrfs_buffer_node(parent); - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(parent); if (slot != nritems -1) { - btrfs_memmove(root, node, node->ptrs + slot, - node->ptrs + slot + 1, + memmove_extent_buffer(parent, + btrfs_node_key_ptr_offset(slot), + btrfs_node_key_ptr_offset(slot + 1), sizeof(struct btrfs_key_ptr) * (nritems - slot - 1)); } nritems--; - btrfs_set_header_nritems(&node->header, nritems); + btrfs_set_header_nritems(parent, nritems); if (nritems == 0 && parent == root->node) { - struct btrfs_header *header = btrfs_buffer_header(root->node); - BUG_ON(btrfs_header_level(header) != 1); + BUG_ON(btrfs_header_level(root->node) != 1); /* just turn the root into a leaf and break */ - btrfs_set_header_level(header, 0); + btrfs_set_header_level(root->node, 0); } else if (slot == 0) { - wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, - level + 1); + struct btrfs_disk_key disk_key; + + btrfs_node_key(parent, &disk_key, 0); + wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); if (wret) ret = wret; } @@ -2023,59 +2158,67 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path) { int slot; - struct btrfs_leaf *leaf; - struct buffer_head *leaf_buf; + struct extent_buffer *leaf; + struct btrfs_item *item; int doff; int dsize; int ret = 0; int wret; u32 nritems; - leaf_buf = path->nodes[0]; - leaf = btrfs_buffer_leaf(leaf_buf); + leaf = path->nodes[0]; slot = path->slots[0]; - doff = btrfs_item_offset(leaf->items + slot); - dsize = btrfs_item_size(leaf->items + slot); - nritems = btrfs_header_nritems(&leaf->header); + doff = btrfs_item_offset_nr(leaf, slot); + dsize = btrfs_item_size_nr(leaf, slot); + nritems = btrfs_header_nritems(leaf); if (slot != nritems - 1) { int i; int data_end = leaf_data_end(root, leaf); - btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + + + memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + dsize, btrfs_leaf_data(leaf) + data_end, doff - data_end); + for (i = slot + 1; i < nritems; i++) { - u32 ioff = btrfs_item_offset(leaf->items + i); - btrfs_set_item_offset(leaf->items + i, ioff + dsize); + u32 ioff; + item = btrfs_item_nr(leaf, i); + ioff = btrfs_item_offset(leaf, item); + btrfs_set_item_offset(leaf, item, ioff + dsize); } - btrfs_memmove(root, leaf, leaf->items + slot, - leaf->items + slot + 1, + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), + btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * (nritems - slot - 1)); } - btrfs_set_header_nritems(&leaf->header, nritems - 1); + btrfs_set_header_nritems(leaf, nritems - 1); nritems--; + /* delete the leaf if we've emptied it */ if (nritems == 0) { - if (leaf_buf == root->node) { - btrfs_set_header_level(&leaf->header, 0); + if (leaf == root->node) { + btrfs_set_header_level(leaf, 0); } else { - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); wret = del_ptr(trans, root, path, 1, path->slots[1]); if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - bh_blocknr(leaf_buf), 1, 1); + extent_buffer_blocknr(leaf), + 1, 1); if (wret) ret = wret; } } else { int used = leaf_space_used(leaf, 0, nritems); if (slot == 0) { + struct btrfs_disk_key disk_key; + + btrfs_item_key(leaf, &disk_key, 0); wret = fixup_low_keys(trans, root, path, - &leaf->items[0].key, 1); + &disk_key, 1); if (wret) ret = wret; } @@ -2087,34 +2230,40 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - get_bh(leaf_buf); + extent_buffer_get(leaf); + wret = push_leaf_left(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; - if (path->nodes[0] == leaf_buf && - btrfs_header_nritems(&leaf->header)) { + + if (path->nodes[0] == leaf && + btrfs_header_nritems(leaf)) { wret = push_leaf_right(trans, root, path, 1); if (wret < 0 && wret != -ENOSPC) ret = wret; } - if (btrfs_header_nritems(&leaf->header) == 0) { - u64 blocknr = bh_blocknr(leaf_buf); - clean_tree_block(trans, root, leaf_buf); - wait_on_buffer(leaf_buf); + + if (btrfs_header_nritems(leaf) == 0) { + u64 blocknr = extent_buffer_blocknr(leaf); + + clean_tree_block(trans, root, leaf); + wait_on_tree_block_writeback(root, leaf); + wret = del_ptr(trans, root, path, 1, slot); if (wret) ret = wret; - btrfs_block_release(root, leaf_buf); + + free_extent_buffer(leaf); wret = btrfs_free_extent(trans, root, blocknr, 1, 1); if (wret) ret = wret; } else { - btrfs_mark_buffer_dirty(leaf_buf); - btrfs_block_release(root, leaf_buf); + btrfs_mark_buffer_dirty(leaf); + free_extent_buffer(leaf); } } else { - btrfs_mark_buffer_dirty(leaf_buf); + btrfs_mark_buffer_dirty(leaf); } } return ret; @@ -2130,25 +2279,27 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) int slot; int level = 1; u64 blocknr; - struct buffer_head *c; - struct btrfs_node *c_node; - struct buffer_head *next = NULL; + struct extent_buffer *c; + struct extent_buffer *next = NULL; while(level < BTRFS_MAX_LEVEL) { if (!path->nodes[level]) return 1; + slot = path->slots[level] + 1; c = path->nodes[level]; - c_node = btrfs_buffer_node(c); - if (slot >= btrfs_header_nritems(&c_node->header)) { + if (slot >= btrfs_header_nritems(c)) { level++; continue; } - blocknr = btrfs_node_blockptr(c_node, slot); + + blocknr = btrfs_node_blockptr(c, slot); if (next) - btrfs_block_release(root, next); + free_extent_buffer(next); + if (path->reada) reada_for_search(root, path, level, slot); + next = read_tree_block(root, blocknr); break; } @@ -2156,15 +2307,14 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) while(1) { level--; c = path->nodes[level]; - btrfs_block_release(root, c); + free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!level) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, - btrfs_node_blockptr(btrfs_buffer_node(next), 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 800a3499cc3f..c4b829806855 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,10 +20,10 @@ #define __BTRFS__ #include -#include #include #include #include "bit-radix.h" +#include "extent_map.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -32,7 +32,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_BtRfS_M" +#define BTRFS_MAGIC "_B2RfS_M" #define BTRFS_ROOT_TREE_OBJECTID 1ULL #define BTRFS_EXTENT_TREE_OBJECTID 2ULL @@ -78,41 +78,41 @@ extern struct kmem_cache *btrfs_path_cachep; */ struct btrfs_disk_key { __le64 objectid; - __le32 flags; + u8 type; __le64 offset; } __attribute__ ((__packed__)); struct btrfs_key { u64 objectid; - u32 flags; + u8 type; u64 offset; } __attribute__ ((__packed__)); +#define BTRFS_FSID_SIZE 16 /* * every tree block (leaf or node) starts with this header. */ struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; - u8 fsid[16]; /* FS specific uuid */ + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ __le64 blocknr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; - __le16 nritems; + __le32 nritems; __le16 flags; u8 level; } __attribute__ ((__packed__)); #define BTRFS_MAX_LEVEL 8 -#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->blocksize - \ +#define BTRFS_NODEPTRS_PER_BLOCK(r) (((r)->nodesize - \ sizeof(struct btrfs_header)) / \ (sizeof(struct btrfs_disk_key) + sizeof(u64))) #define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header)) -#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->blocksize)) +#define BTRFS_LEAF_DATA_SIZE(r) (__BTRFS_LEAF_DATA_SIZE(r->leafsize)) #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) - \ sizeof(struct btrfs_file_extent_item)) -struct buffer_head; /* * the super block basically lists the main trees of the FS * it currently lacks any block count etc etc @@ -128,7 +128,9 @@ struct btrfs_super_block { __le64 total_blocks; __le64 blocks_used; __le64 root_dir_objectid; - __le32 blocksize; + __le32 sectorsize; + __le32 nodesize; + __le32 leafsize; } __attribute__ ((__packed__)); /* @@ -138,7 +140,7 @@ struct btrfs_super_block { struct btrfs_item { struct btrfs_disk_key key; __le32 offset; - __le16 size; + __le32 size; } __attribute__ ((__packed__)); /* @@ -176,7 +178,7 @@ struct btrfs_node { * used while walking the tree. */ struct btrfs_path { - struct buffer_head *nodes[BTRFS_MAX_LEVEL]; + struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; int reada; int lowest_level; @@ -292,6 +294,7 @@ struct btrfs_block_group_cache { }; struct btrfs_fs_info { + u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; @@ -304,9 +307,8 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; struct btrfs_transaction *running_transaction; - struct btrfs_super_block *disk_super; struct btrfs_super_block super_copy; - struct buffer_head *sb_buffer; + struct extent_buffer *sb_buffer; struct super_block *sb; struct inode *btree_inode; struct mutex trans_mutex; @@ -325,8 +327,8 @@ struct btrfs_fs_info { * and for the extent tree extent_root root. */ struct btrfs_root { - struct buffer_head *node; - struct buffer_head *commit_root; + struct extent_buffer *node; + struct extent_buffer *commit_root; struct btrfs_root_item root_item; struct btrfs_key root_key; struct btrfs_fs_info *fs_info; @@ -336,7 +338,16 @@ struct btrfs_root { struct rw_semaphore snap_sem; u64 objectid; u64 last_trans; - u32 blocksize; + + /* data allocations are done in sectorsize units */ + u32 sectorsize; + + /* node allocations are done in nodesize units */ + u32 nodesize; + + /* leaf allocations are done in leafsize units */ + u32 leafsize; + u32 type; u64 highest_inode; u64 last_inode_alloc; @@ -347,12 +358,6 @@ struct btrfs_root { char *name; }; -/* the lower bits in the key flags defines the item type */ -#define BTRFS_KEY_TYPE_MAX 256 -#define BTRFS_KEY_TYPE_SHIFT 24 -#define BTRFS_KEY_TYPE_MASK (((u32)BTRFS_KEY_TYPE_MAX - 1) << \ - BTRFS_KEY_TYPE_SHIFT) - /* * inode items have the data typically returned from stat and store other * info about object characteristics. There is one for every file and dir in @@ -402,246 +407,253 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +/* some macros to generate set/get funcs for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple + * one for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +#define read_eb_member(eb, ptr, type, member, result) ( \ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) ( \ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb, \ + type *s) \ +{ \ + __le##bits res; \ + read_eb_member(eb, s, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + type *s, u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val); \ +} + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(struct extent_buffer *eb) \ +{ \ + __le##bits res; \ + read_eb_member(eb, NULL, type, member, &res); \ + return le##bits##_to_cpu(res); \ +} \ +static inline void btrfs_set_##name(struct extent_buffer *eb, \ + u##bits val) \ +{ \ + val = cpu_to_le##bits(val); \ + write_eb_member(eb, NULL, type, member, &val); \ +} -static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi) -{ - return le64_to_cpu(bi->used); -} - -static inline void btrfs_set_block_group_used(struct - btrfs_block_group_item *bi, - u64 val) -{ - bi->used = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->generation); -} - -static inline void btrfs_set_inode_generation(struct btrfs_inode_item *i, - u64 val) -{ - i->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_size(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->size); -} - -static inline void btrfs_set_inode_size(struct btrfs_inode_item *i, u64 val) -{ - i->size = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_nblocks(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->nblocks); -} - -static inline void btrfs_set_inode_nblocks(struct btrfs_inode_item *i, u64 val) -{ - i->nblocks = cpu_to_le64(val); -} - -static inline u64 btrfs_inode_block_group(struct btrfs_inode_item *i) -{ - return le64_to_cpu(i->block_group); -} - -static inline void btrfs_set_inode_block_group(struct btrfs_inode_item *i, - u64 val) -{ - i->block_group = cpu_to_le64(val); -} - -static inline u32 btrfs_inode_nlink(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->nlink); -} - -static inline void btrfs_set_inode_nlink(struct btrfs_inode_item *i, u32 val) -{ - i->nlink = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_uid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->uid); -} - -static inline void btrfs_set_inode_uid(struct btrfs_inode_item *i, u32 val) -{ - i->uid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_gid(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->gid); -} - -static inline void btrfs_set_inode_gid(struct btrfs_inode_item *i, u32 val) -{ - i->gid = cpu_to_le32(val); -} - -static inline u32 btrfs_inode_mode(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->mode); -} - -static inline void btrfs_set_inode_mode(struct btrfs_inode_item *i, u32 val) -{ - i->mode = cpu_to_le32(val); +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(type *s) \ +{ \ + return le##bits##_to_cpu(s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + s->member = cpu_to_le##bits(val); \ } -static inline u32 btrfs_inode_rdev(struct btrfs_inode_item *i) -{ - return le32_to_cpu(i->rdev); -} +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, + used, 64); -static inline void btrfs_set_inode_rdev(struct btrfs_inode_item *i, u32 val) -{ - i->rdev = cpu_to_le32(val); -} +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nblocks, struct btrfs_inode_item, nblocks, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 32); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 16); +BTRFS_SETGET_FUNCS(inode_compat_flags, struct btrfs_inode_item, + compat_flags, 16); -static inline u16 btrfs_inode_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_atime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, atime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_flags(struct btrfs_inode_item *i, u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_mtime(struct btrfs_inode_item *inode_item) { - i->flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, mtime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u16 btrfs_inode_compat_flags(struct btrfs_inode_item *i) +static inline struct btrfs_inode_timespec * +btrfs_inode_ctime(struct btrfs_inode_item *inode_item) { - return le16_to_cpu(i->compat_flags); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, ctime); + return (struct btrfs_inode_timespec *)ptr; } -static inline void btrfs_set_inode_compat_flags(struct btrfs_inode_item *i, - u16 val) +static inline struct btrfs_inode_timespec * +btrfs_inode_otime(struct btrfs_inode_item *inode_item) { - i->compat_flags = cpu_to_le16(val); + unsigned long ptr = (unsigned long)inode_item; + ptr += offsetof(struct btrfs_inode_item, otime); + return (struct btrfs_inode_timespec *)ptr; } -static inline u64 btrfs_timespec_sec(struct btrfs_inode_timespec *ts) -{ - return le64_to_cpu(ts->sec); -} +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_inode_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_inode_timespec, nsec, 32); -static inline void btrfs_set_timespec_sec(struct btrfs_inode_timespec *ts, - u64 val) -{ - ts->sec = cpu_to_le64(val); -} +/* struct btrfs_extent_item */ +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 32); +BTRFS_SETGET_FUNCS(extent_owner, struct btrfs_extent_item, owner, 32); -static inline u32 btrfs_timespec_nsec(struct btrfs_inode_timespec *ts) -{ - return le32_to_cpu(ts->nsec); -} +BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, + refs, 32); +BTRFS_SETGET_STACK_FUNCS(stack_extent_owner, struct btrfs_extent_item, + owner, 32); -static inline void btrfs_set_timespec_nsec(struct btrfs_inode_timespec *ts, - u32 val) -{ - ts->nsec = cpu_to_le32(val); -} +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); -static inline u32 btrfs_extent_refs(struct btrfs_extent_item *ei) +static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(ei->refs); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); } -static inline void btrfs_set_extent_refs(struct btrfs_extent_item *ei, u32 val) +static inline void btrfs_set_node_blockptr(struct extent_buffer *eb, + int nr, u64 val) { - ei->refs = cpu_to_le32(val); + unsigned long ptr; + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); } -static inline u64 btrfs_extent_owner(struct btrfs_extent_item *ei) +static unsigned long btrfs_node_key_ptr_offset(int nr) { - return le64_to_cpu(ei->owner); + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; } -static inline void btrfs_set_extent_owner(struct btrfs_extent_item *ei, u64 val) +static void btrfs_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - ei->owner = cpu_to_le64(val); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + read_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } - -static inline u64 btrfs_node_blockptr(struct btrfs_node *n, int nr) +static inline void btrfs_set_node_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le64_to_cpu(n->ptrs[nr].blockptr); + unsigned long ptr; + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); } +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32); -static inline void btrfs_set_node_blockptr(struct btrfs_node *n, int nr, - u64 val) +static inline unsigned long btrfs_item_nr_offset(int nr) { - n->ptrs[nr].blockptr = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; } -static inline u32 btrfs_item_offset(struct btrfs_item *item) +static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb, + int nr) { - return le32_to_cpu(item->offset); + return (struct btrfs_item *)btrfs_item_nr_offset(nr); } -static inline void btrfs_set_item_offset(struct btrfs_item *item, u32 val) +static inline u32 btrfs_item_end(struct extent_buffer *eb, + struct btrfs_item *item) { - item->offset = cpu_to_le32(val); + return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item); } -static inline u32 btrfs_item_end(struct btrfs_item *item) +static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr) { - return le32_to_cpu(item->offset) + le16_to_cpu(item->size); + return btrfs_item_end(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_item_size(struct btrfs_item *item) +static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr) { - return le16_to_cpu(item->size); + return btrfs_item_offset(eb, btrfs_item_nr(eb, nr)); } -static inline void btrfs_set_item_size(struct btrfs_item *item, u16 val) +static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr) { - item->size = cpu_to_le16(val); + return btrfs_item_size(eb, btrfs_item_nr(eb, nr)); } -static inline u16 btrfs_dir_flags(struct btrfs_dir_item *d) +static inline void btrfs_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - return le16_to_cpu(d->flags); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + read_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline void btrfs_set_dir_flags(struct btrfs_dir_item *d, u16 val) +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) { - d->flags = cpu_to_le16(val); + struct btrfs_item *item = btrfs_item_nr(eb, nr); + write_eb_member(eb, item, struct btrfs_item, key, disk_key); } -static inline u8 btrfs_dir_type(struct btrfs_dir_item *d) -{ - return d->type; -} +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); -static inline void btrfs_set_dir_type(struct btrfs_dir_item *d, u8 val) +static inline void btrfs_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - d->type = val; + read_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline u16 btrfs_dir_name_len(struct btrfs_dir_item *d) +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_disk_key *key) { - return le16_to_cpu(d->name_len); + write_eb_member(eb, item, struct btrfs_dir_item, location, key); } -static inline void btrfs_set_dir_name_len(struct btrfs_dir_item *d, u16 val) -{ - d->name_len = cpu_to_le16(val); -} +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, + objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, struct btrfs_disk_key *disk) { cpu->offset = le64_to_cpu(disk->offset); - cpu->flags = le32_to_cpu(disk->flags); + cpu->type = disk->type; cpu->objectid = le64_to_cpu(disk->objectid); } @@ -649,400 +661,167 @@ static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, struct btrfs_key *cpu) { disk->offset = cpu_to_le64(cpu->offset); - disk->flags = cpu_to_le32(cpu->flags); + disk->type = cpu->type; disk->objectid = cpu_to_le64(cpu->objectid); } -static inline u64 btrfs_disk_key_objectid(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->objectid); -} - -static inline void btrfs_set_disk_key_objectid(struct btrfs_disk_key *disk, - u64 val) -{ - disk->objectid = cpu_to_le64(val); -} - -static inline u64 btrfs_disk_key_offset(struct btrfs_disk_key *disk) -{ - return le64_to_cpu(disk->offset); -} - -static inline void btrfs_set_disk_key_offset(struct btrfs_disk_key *disk, - u64 val) -{ - disk->offset = cpu_to_le64(val); -} - -static inline u32 btrfs_disk_key_flags(struct btrfs_disk_key *disk) -{ - return le32_to_cpu(disk->flags); -} - -static inline void btrfs_set_disk_key_flags(struct btrfs_disk_key *disk, - u32 val) -{ - disk->flags = cpu_to_le32(val); -} - -static inline u32 btrfs_disk_key_type(struct btrfs_disk_key *key) -{ - return le32_to_cpu(key->flags) >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_disk_key_type(struct btrfs_disk_key *key, - u32 val) -{ - u32 flags = btrfs_disk_key_flags(key); - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - flags = (flags & ~BTRFS_KEY_TYPE_MASK) | val; - btrfs_set_disk_key_flags(key, flags); -} - -static inline u32 btrfs_key_type(struct btrfs_key *key) -{ - return key->flags >> BTRFS_KEY_TYPE_SHIFT; -} - -static inline void btrfs_set_key_type(struct btrfs_key *key, u32 val) -{ - BUG_ON(val >= BTRFS_KEY_TYPE_MAX); - val = val << BTRFS_KEY_TYPE_SHIFT; - key->flags = (key->flags & ~(BTRFS_KEY_TYPE_MASK)) | val; -} - -static inline u64 btrfs_header_blocknr(struct btrfs_header *h) -{ - return le64_to_cpu(h->blocknr); -} - -static inline void btrfs_set_header_blocknr(struct btrfs_header *h, u64 blocknr) -{ - h->blocknr = cpu_to_le64(blocknr); -} - -static inline u64 btrfs_header_generation(struct btrfs_header *h) +static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - return le64_to_cpu(h->generation); + struct btrfs_disk_key disk_key; + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_generation(struct btrfs_header *h, - u64 val) +static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_key *key, int nr) { - h->generation = cpu_to_le64(val); + struct btrfs_disk_key disk_key; + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline u64 btrfs_header_owner(struct btrfs_header *h) +static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb, + struct btrfs_dir_item *item, + struct btrfs_key *key) { - return le64_to_cpu(h->owner); + struct btrfs_disk_key disk_key; + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); } -static inline void btrfs_set_header_owner(struct btrfs_header *h, - u64 val) -{ - h->owner = cpu_to_le64(val); -} - -static inline u16 btrfs_header_nritems(struct btrfs_header *h) -{ - return le16_to_cpu(h->nritems); -} - -static inline void btrfs_set_header_nritems(struct btrfs_header *h, u16 val) -{ - h->nritems = cpu_to_le16(val); -} - -static inline u16 btrfs_header_flags(struct btrfs_header *h) -{ - return le16_to_cpu(h->flags); -} - -static inline void btrfs_set_header_flags(struct btrfs_header *h, u16 val) -{ - h->flags = cpu_to_le16(val); -} - -static inline int btrfs_header_level(struct btrfs_header *h) -{ - return h->level; -} - -static inline void btrfs_set_header_level(struct btrfs_header *h, int level) -{ - BUG_ON(level > BTRFS_MAX_LEVEL); - h->level = level; -} - -static inline int btrfs_is_leaf(struct btrfs_node *n) -{ - return (btrfs_header_level(&n->header) == 0); -} - -static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocknr); -} - -static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val) -{ - item->blocknr = cpu_to_le64(val); -} - -static inline u64 btrfs_root_dirid(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->root_dirid); -} - -static inline void btrfs_set_root_dirid(struct btrfs_root_item *item, u64 val) -{ - item->root_dirid = cpu_to_le64(val); -} - -static inline u32 btrfs_root_refs(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->refs); -} - -static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val) -{ - item->refs = cpu_to_le32(val); -} - -static inline u32 btrfs_root_flags(struct btrfs_root_item *item) -{ - return le32_to_cpu(item->flags); -} - -static inline void btrfs_set_root_flags(struct btrfs_root_item *item, u32 val) -{ - item->flags = cpu_to_le32(val); -} - -static inline void btrfs_set_root_blocks_used(struct btrfs_root_item *item, - u64 val) -{ - item->blocks_used = cpu_to_le64(val); -} - -static inline u64 btrfs_root_blocks_used(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->blocks_used); -} - -static inline void btrfs_set_root_block_limit(struct btrfs_root_item *item, - u64 val) -{ - item->block_limit = cpu_to_le64(val); -} - -static inline u64 btrfs_root_block_limit(struct btrfs_root_item *item) -{ - return le64_to_cpu(item->block_limit); -} -static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s) +static inline u8 btrfs_key_type(struct btrfs_key *key) { - return le64_to_cpu(s->blocknr); + return key->type; } -static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val) +static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) { - s->blocknr = cpu_to_le64(val); + key->type = val; } -static inline u64 btrfs_super_generation(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->generation); -} +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 16); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); -static inline void btrfs_set_super_generation(struct btrfs_super_block *s, - u64 val) +static inline u8 *btrfs_header_fsid(struct extent_buffer *eb) { - s->generation = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, fsid); + return (u8 *)ptr; } -static inline u64 btrfs_super_root(struct btrfs_super_block *s) +static inline u8 *btrfs_super_fsid(struct extent_buffer *eb) { - return le64_to_cpu(s->root); + unsigned long ptr = offsetof(struct btrfs_super_block, fsid); + return (u8 *)ptr; } -static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val) +static inline u8 *btrfs_header_csum(struct extent_buffer *eb) { - s->root = cpu_to_le64(val); + unsigned long ptr = offsetof(struct btrfs_header, csum); + return (u8 *)ptr; } -static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s) +static inline struct btrfs_node *btrfs_buffer_node(struct extent_buffer *eb) { - return le64_to_cpu(s->total_blocks); + return NULL; } -static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s, - u64 val) +static inline struct btrfs_leaf *btrfs_buffer_leaf(struct extent_buffer *eb) { - s->total_blocks = cpu_to_le64(val); + return NULL; } -static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s) +static inline struct btrfs_header *btrfs_buffer_header(struct extent_buffer *eb) { - return le64_to_cpu(s->blocks_used); + return NULL; } -static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s, - u64 val) +static inline int btrfs_is_leaf(struct extent_buffer *eb) { - s->blocks_used = cpu_to_le64(val); + return (btrfs_header_level(eb) == 0); } -static inline u32 btrfs_super_blocksize(struct btrfs_super_block *s) -{ - return le32_to_cpu(s->blocksize); -} +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); -static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s, - u32 val) -{ - s->blocksize = cpu_to_le32(val); -} +BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); -static inline u64 btrfs_super_root_dir(struct btrfs_super_block *s) -{ - return le64_to_cpu(s->root_dir_objectid); -} +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, + total_blocks, 64); +BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, + blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_leafsize, struct btrfs_super_block, + leafsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); -static inline void btrfs_set_super_root_dir(struct btrfs_super_block *s, u64 - val) +static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { - s->root_dir_objectid = cpu_to_le64(val); + return offsetof(struct btrfs_leaf, items); } -static inline u8 *btrfs_leaf_data(struct btrfs_leaf *l) -{ - return (u8 *)l->items; -} +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -static inline int btrfs_file_extent_type(struct btrfs_file_extent_item *e) -{ - return e->type; -} -static inline void btrfs_set_file_extent_type(struct btrfs_file_extent_item *e, - u8 val) -{ - e->type = val; -} - -static inline char *btrfs_file_extent_inline_start(struct +static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { - return (char *)(&e->disk_blocknr); + unsigned long offset = (unsigned long)e; + offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return (unsigned long)(&((struct - btrfs_file_extent_item *)NULL)->disk_blocknr) + datasize; -} - -static inline u32 btrfs_file_extent_inline_len(struct btrfs_item *e) -{ - struct btrfs_file_extent_item *fe = NULL; - return btrfs_item_size(e) - (unsigned long)(&fe->disk_blocknr); -} - -static inline u64 btrfs_file_extent_disk_blocknr(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->disk_blocknr); + return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; } -static inline void btrfs_set_file_extent_disk_blocknr(struct - btrfs_file_extent_item - *e, u64 val) +static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, + struct btrfs_item *e) { - e->disk_blocknr = cpu_to_le64(val); + unsigned long offset; + offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + return btrfs_item_size(eb, e) - offset; } -static inline u64 btrfs_file_extent_generation(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->generation); -} - -static inline void btrfs_set_file_extent_generation(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->generation = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_disk_num_blocks(struct - btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->disk_num_blocks); -} - -static inline void btrfs_set_file_extent_disk_num_blocks(struct - btrfs_file_extent_item - *e, u64 val) -{ - e->disk_num_blocks = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_offset(struct btrfs_file_extent_item *e) -{ - return le64_to_cpu(e->offset); -} - -static inline void btrfs_set_file_extent_offset(struct btrfs_file_extent_item - *e, u64 val) -{ - e->offset = cpu_to_le64(val); -} - -static inline u64 btrfs_file_extent_num_blocks(struct btrfs_file_extent_item - *e) -{ - return le64_to_cpu(e->num_blocks); -} - -static inline void btrfs_set_file_extent_num_blocks(struct - btrfs_file_extent_item *e, - u64 val) -{ - e->num_blocks = cpu_to_le64(val); -} +BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, + disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, + disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, + num_blocks, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { return sb->s_fs_info; } -static inline void btrfs_check_bounds(void *vptr, size_t len, - void *vcontainer, size_t container_len) -{ - char *ptr = vptr; - char *container = vcontainer; - WARN_ON(ptr < container); - WARN_ON(ptr + len > container + container_len); -} - -static inline void btrfs_memcpy(struct btrfs_root *root, - void *dst_block, - void *dst, const void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memcpy(dst, src, nr); -} - -static inline void btrfs_memmove(struct btrfs_root *root, - void *dst_block, - void *dst, void *src, size_t nr) -{ - btrfs_check_bounds(dst, nr, dst_block, root->fs_info->sb->s_blocksize); - memmove(dst, src, nr); -} - static inline int btrfs_set_root_name(struct btrfs_root *root, const char *name, int len) { @@ -1063,7 +842,11 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ - btrfs_item_offset((leaf)->items + (slot)))) + btrfs_item_offset_nr(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(btrfs_leaf_data(leaf) + \ + btrfs_item_offset_nr(leaf, slot))) /* mount option defines and helpers */ #define BTRFS_MOUNT_SUBVOL 0x000001 @@ -1084,7 +867,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner); int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, @@ -1092,7 +875,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, u64 num_blocks, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); + struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 blocknr, u64 num_blocks, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, @@ -1106,10 +889,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); /* ctree.c */ -int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct buffer_head *buf, struct buffer_head - *parent, int parent_slot, struct buffer_head - **cow_ret); +int btrfs_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *parent, int parent_slot, + struct extent_buffer **cow_ret); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, @@ -1120,7 +903,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_path *p, int ins_len, int cow); int btrfs_realloc_node(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *parent, + struct btrfs_root *root, struct extent_buffer *parent, int cache_only, u64 *last_ret); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -1134,7 +917,7 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *cpu_key, u32 data_size); int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf); +int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* root-item.c */ @@ -1179,9 +962,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int btrfs_find_highest_inode(struct btrfs_root *fs_root, u64 *objectid); /* inode-item.c */ -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item); +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid); int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *location, int mod); @@ -1224,8 +1007,6 @@ struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); -int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *result, int create); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 49db5fa7ced3..6f19de41b878 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -33,7 +33,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle int ret; char *ptr; struct btrfs_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); if (ret == -EEXIST) { @@ -49,11 +49,11 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle if (ret < 0) return ERR_PTR(ret); WARN_ON(ret > 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - item = leaf->items + path->slots[0]; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); - BUG_ON(data_size > btrfs_item_size(item)); - ptr += btrfs_item_size(item) - data_size; + BUG_ON(data_size > btrfs_item_size(leaf, item)); + ptr += btrfs_item_size(leaf, item) - data_size; return (struct btrfs_dir_item *)ptr; } @@ -65,12 +65,13 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root int ret2 = 0; struct btrfs_path *path; struct btrfs_dir_item *dir_item; - char *name_ptr; + struct extent_buffer *leaf; + unsigned long name_ptr; struct btrfs_key key; + struct btrfs_disk_key disk_key; u32 data_size; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); @@ -85,14 +86,16 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); second_insert: /* FIXME, use some real flag for selecting the extra index */ @@ -110,13 +113,15 @@ second_insert: ret2 = PTR_ERR(dir_item); goto out; } - btrfs_cpu_key_to_disk(&dir_item->location, location); - btrfs_set_dir_type(dir_item, type); - btrfs_set_dir_flags(dir_item, 0); - btrfs_set_dir_name_len(dir_item, name_len); - name_ptr = (char *)(dir_item + 1); - btrfs_memcpy(root, path->nodes[0]->b_data, name_ptr, name, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, type); + btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + name_ptr = (unsigned long)(dir_item + 1); + write_extent_buffer(leaf, name, name_ptr, name_len); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); if (ret) @@ -136,14 +141,15 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_key key; int ins_len = mod < 0 ? -1 : 0; int cow = mod != 0; - struct btrfs_disk_key *found_key; - struct btrfs_leaf *leaf; + struct btrfs_key found_key; + struct extent_buffer *leaf; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); if (ret < 0) return ERR_PTR(ret); @@ -152,12 +158,13 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return NULL; path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - if (btrfs_disk_key_objectid(found_key) != dir || - btrfs_disk_key_type(found_key) != BTRFS_DIR_ITEM_KEY || - btrfs_disk_key_offset(found_key) != key.offset) + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_DIR_ITEM_KEY || + found_key.offset != key.offset) return NULL; return btrfs_match_dir_item_name(root, path, name, name_len); @@ -176,7 +183,6 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, int cow = mod != 0; key.objectid = dir; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY); key.offset = objectid; @@ -193,21 +199,22 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, const char *name, int name_len) { struct btrfs_dir_item *dir_item; - char *name_ptr; + unsigned long name_ptr; u32 total_len; u32 cur = 0; u32 this_len; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); - total_len = btrfs_item_size(leaf->items + path->slots[0]); + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { - this_len = sizeof(*dir_item) + btrfs_dir_name_len(dir_item); - name_ptr = (char *)(dir_item + 1); + this_len = sizeof(*dir_item) + + btrfs_dir_name_len(leaf, dir_item); + name_ptr = (unsigned long)(dir_item + 1); - if (btrfs_dir_name_len(dir_item) == name_len && - memcmp(name_ptr, name, name_len) == 0) + if (btrfs_dir_name_len(leaf, dir_item) == name_len && + memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0) return dir_item; cur += this_len; @@ -223,20 +230,23 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u32 sub_item_len; u32 item_len; int ret = 0; - leaf = btrfs_buffer_leaf(path->nodes[0]); - sub_item_len = sizeof(*di) + btrfs_dir_name_len(di); - item_len = btrfs_item_size(leaf->items + path->slots[0]); - if (sub_item_len == btrfs_item_size(leaf->items + path->slots[0])) { + leaf = path->nodes[0]; + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + item_len = btrfs_item_size_nr(leaf, path->slots[0]); + if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); } else { - char *ptr = (char *)di; - char *start = btrfs_item_ptr(leaf, path->slots[0], char); - btrfs_memmove(root, leaf, ptr, ptr + sub_item_len, + /* MARKER */ + unsigned long ptr = (unsigned long)di; + unsigned long start; + + start = btrfs_item_ptr_offset(leaf, path->slots[0]); + memmove_extent_buffer(leaf, ptr, ptr + sub_item_len, item_len - (ptr + sub_item_len - start)); ret = btrfs_truncate_item(trans, root, path, item_len - sub_item_len); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec20..0c1f90cbedb0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) { - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; } - return 0; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; } -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; - - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; - - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; + + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index fc6b520c6e0b..70d9413c599f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,68 +19,35 @@ #ifndef __DISKIO__ #define __DISKIO__ -#include - #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -enum btrfs_bh_state_bits { - BH_Checked = BH_PrivateStart, - BH_Defrag, - BH_DefragDone, -}; -BUFFER_FNS(Checked, checked); -BUFFER_FNS(Defrag, defrag); -BUFFER_FNS(DefragDone, defrag_done); - -static inline struct btrfs_node *btrfs_buffer_node(struct buffer_head *bh) -{ - return (struct btrfs_node *)bh->b_data; -} - -static inline struct btrfs_leaf *btrfs_buffer_leaf(struct buffer_head *bh) -{ - return (struct btrfs_leaf *)bh->b_data; -} - -static inline struct btrfs_header *btrfs_buffer_header(struct buffer_head *bh) -{ - return &((struct btrfs_node *)bh->b_data)->header; -} - -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); int readahead_tree_block(struct btrfs_root *root, u64 blocknr); -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf); +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr); int clean_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *buf); -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, - struct btrfs_root *root); + struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); int close_ctree(struct btrfs_root *root); -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr); -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result); +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); -u64 bh_blocknr(struct buffer_head *bh); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, u64 block_start, u64 num_blocks); -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical); void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -void btrfs_mark_buffer_dirty(struct buffer_head *bh); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf); +int btrfs_buffer_uptodate(struct extent_buffer *buf); +int btrfs_set_buffer_uptodate(struct extent_buffer *buf); +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f261a8326cdf..089c41cbca74 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,7 +33,7 @@ static int cache_block_group(struct btrfs_root *root, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct radix_tree_root *extent_radix; int slot; u64 i; @@ -56,7 +56,6 @@ static int cache_block_group(struct btrfs_root *root, path->reada = 2; first_free = block_group->key.objectid; key.objectid = block_group->key.objectid; - key.flags = 0; key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); @@ -69,9 +68,9 @@ static int cache_block_group(struct btrfs_root *root, path->slots[0]--; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&leaf->header)) { + if (slot >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); if (ret < 0) goto err; @@ -82,7 +81,7 @@ static int cache_block_group(struct btrfs_root *root, } } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.objectid < block_group->key.objectid) { if (key.objectid + key.offset > first_free) first_free = key.objectid + key.offset; @@ -116,8 +115,7 @@ next: hole_size = block_group->key.objectid + block_group->key.offset - last; for (i = 0; i < hole_size; i++) { - set_radix_bit(extent_radix, - last + i); + set_radix_bit(extent_radix, last + i); } } block_group->cached = 1; @@ -366,7 +364,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; u32 refs; @@ -375,7 +373,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, return -ENOMEM; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, @@ -386,10 +383,10 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, BUG(); } BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - refs = btrfs_extent_refs(item); - btrfs_set_extent_refs(item, refs + 1); + refs = btrfs_extent_refs(l, item); + btrfs_set_extent_refs(l, item, refs + 1); btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_release_path(root->fs_info->extent_root, path); @@ -414,23 +411,25 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_path *path; int ret; struct btrfs_key key; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_extent_item *item; path = btrfs_alloc_path(); key.objectid = blocknr; key.offset = num_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - if (ret != 0) + if (ret != 0) { + btrfs_print_leaf(root, path->nodes[0]); + printk("failed to find block number %Lu\n", blocknr); BUG(); - l = btrfs_buffer_leaf(path->nodes[0]); + } + l = path->nodes[0]; item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); - *refs = btrfs_extent_refs(item); + *refs = btrfs_extent_refs(l, item); out: btrfs_free_path(path); return 0; @@ -439,16 +438,16 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, + extent_buffer_blocknr(root->node), 1); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { u64 blocknr; - struct btrfs_node *buf_node; - struct btrfs_leaf *buf_leaf; - struct btrfs_disk_key *key; + u32 nritems; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int leaf; @@ -458,31 +457,31 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - buf_node = btrfs_buffer_node(buf); - leaf = btrfs_is_leaf(buf_node); - buf_leaf = btrfs_buffer_leaf(buf); - for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) { + + leaf = btrfs_is_leaf(buf); + nritems = btrfs_header_nritems(buf); + for (i = 0; i < nritems; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi)); + btrfs_file_extent_disk_num_blocks(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); if (ret) { faili = i; @@ -496,22 +495,23 @@ fail: for (i =0; i < faili; i++) { if (leaf) { u64 disk_blocknr; - key = &buf_leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + btrfs_item_key_to_cpu(buf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; - fi = btrfs_item_ptr(buf_leaf, i, + fi = btrfs_item_ptr(buf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); if (disk_blocknr == 0) continue; err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), 0); + btrfs_file_extent_disk_num_blocks(buf, + fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf_node, i); + blocknr = btrfs_node_blockptr(buf, i); err = btrfs_free_extent(trans, root, blocknr, 1, 0); BUG_ON(err); } @@ -527,16 +527,18 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, int ret; int pending_ret; struct btrfs_root *extent_root = root->fs_info->extent_root; - struct btrfs_block_group_item *bi; + unsigned long bi; + struct extent_buffer *leaf; ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); if (ret < 0) goto fail; BUG_ON(ret); - bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_block_group_item); - memcpy(bi, &cache->item, sizeof(*bi)); - btrfs_mark_buffer_dirty(path->nodes[0]); + + leaf = path->nodes[0]; + bi = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); + btrfs_mark_buffer_dirty(leaf); btrfs_release_path(extent_root, path); fail: finish_current_insert(trans, extent_root); @@ -768,11 +770,11 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct unsigned long gang[8]; struct btrfs_fs_info *info = extent_root->fs_info; - btrfs_set_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_refs(&extent_item, 1); ins.offset = 1; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY); - btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid); + btrfs_set_stack_extent_owner(&extent_item, + extent_root->root_key.objectid); while(1) { ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0, @@ -795,23 +797,20 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) { int err; - struct btrfs_header *header; - struct buffer_head *bh; + struct extent_buffer *buf; if (!pending) { - bh = btrfs_find_tree_block(root, blocknr); - if (bh) { - if (buffer_uptodate(bh)) { + buf = btrfs_find_tree_block(root, blocknr); + if (buf) { + if (btrfs_buffer_uptodate(buf)) { u64 transid = root->fs_info->running_transaction->transid; - header = btrfs_buffer_header(bh); - if (btrfs_header_generation(header) == - transid) { - btrfs_block_release(root, bh); + if (btrfs_header_generation(buf) == transid) { + free_extent_buffer(buf); return 0; } } - btrfs_block_release(root, bh); + free_extent_buffer(buf); } err = set_radix_bit(&root->fs_info->pinned_radix, blocknr); if (!err) { @@ -839,12 +838,12 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_key key; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; + struct extent_buffer *leaf; int ret; struct btrfs_extent_item *ei; u32 refs; key.objectid = blocknr; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); key.offset = num_blocks; @@ -856,12 +855,16 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) return ret; BUG_ON(ret); - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); - BUG_ON(ei->refs == 0); - refs = btrfs_extent_refs(ei) - 1; - btrfs_set_extent_refs(ei, refs); - btrfs_mark_buffer_dirty(path->nodes[0]); + refs = btrfs_extent_refs(leaf, ei); + BUG_ON(refs == 0); + refs -= 1; + btrfs_set_extent_refs(leaf, ei, refs); + btrfs_mark_buffer_dirty(leaf); + if (refs == 0) { u64 super_blocks_used, root_blocks_used; @@ -876,8 +879,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root super_blocks_used - num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used - num_blocks); ret = btrfs_del_item(trans, extent_root, path); @@ -984,7 +987,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 test_block; u64 orig_search_start = search_start; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; int total_needed = num_blocks; @@ -994,10 +997,10 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int wrapped = 0; WARN_ON(num_blocks < 1); - ins->flags = 0; btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); + if (search_end == (u64)-1) search_end = btrfs_super_total_blocks(&info->super_copy); if (hint_block) { @@ -1034,8 +1037,9 @@ check_failed: path->slots[0]--; } - l = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &key, path->slots[0]); + /* * a rare case, go back one key if we hit a block group item * instead of an extent item @@ -1055,9 +1059,9 @@ check_failed: } while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -1075,7 +1079,7 @@ check_failed: goto check_pending; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start && key.objectid > last_block && start_found) { if (last_block < search_start) @@ -1183,8 +1187,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; - btrfs_set_extent_refs(&extent_item, 1); - btrfs_set_extent_owner(&extent_item, owner); + btrfs_set_stack_extent_refs(&extent_item, 1); + btrfs_set_stack_extent_owner(&extent_item, owner); WARN_ON(num_blocks < 1); ret = find_free_extent(trans, root, num_blocks, empty_size, @@ -1201,8 +1205,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, num_blocks); /* block accounting for root item */ - root_blocks_used = btrfs_root_blocks_used(&root->root_item); - btrfs_set_root_blocks_used(&root->root_item, root_blocks_used + + root_blocks_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_blocks_used + num_blocks); if (root == extent_root) { @@ -1241,13 +1245,13 @@ update_block: * helper function to allocate a block for a given tree * returns the tree buffer or NULL. */ -struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size) +struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 hint, + u64 empty_size) { struct btrfs_key ins; int ret; - struct buffer_head *buf; + struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, 1, empty_size, hint, (u64)-1, &ins, 0); @@ -1260,53 +1264,57 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, btrfs_free_extent(trans, root, ins.objectid, 1, 0); return ERR_PTR(-ENOMEM); } - WARN_ON(buffer_dirty(buf)); - set_buffer_uptodate(buf); + btrfs_set_buffer_uptodate(buf); + set_extent_dirty(&trans->transaction->dirty_pages, buf->start, + buf->start + buf->len - 1, GFP_NOFS); + /* set_buffer_checked(buf); set_buffer_defrag(buf); - set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); + */ + /* FIXME!!!!!!!!!!!!!!!! + set_radix_bit(&trans->transaction->dirty_pages, buf->pages[0]->index); + */ trans->blocks_used++; return buf; } static int drop_leaf_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct buffer_head *cur) + struct btrfs_root *root, struct extent_buffer *leaf) { - struct btrfs_disk_key *key; - struct btrfs_leaf *leaf; + struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; - BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur))); - leaf = btrfs_buffer_leaf(cur); - nritems = btrfs_header_nritems(&leaf->header); + BUG_ON(!btrfs_is_leaf(leaf)); + nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { u64 disk_blocknr; - key = &leaf->items[i].key; - if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY) + + btrfs_item_key_to_cpu(leaf, &key, i); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE) + if (btrfs_file_extent_type(leaf, fi) == + BTRFS_FILE_EXTENT_INLINE) continue; /* * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(fi); + disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); if (disk_blocknr == 0) continue; ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(fi), - 0); + btrfs_file_extent_disk_num_blocks(leaf, fi), 0); BUG_ON(ret); } return 0; } static void reada_walk_down(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; @@ -1314,7 +1322,7 @@ static void reada_walk_down(struct btrfs_root *root, int ret; u32 refs; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); @@ -1337,16 +1345,17 @@ static void reada_walk_down(struct btrfs_root *root, static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int *level) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]), - 1, &refs); + ret = lookup_extent_ref(trans, root, + extent_buffer_blocknr(path->nodes[*level]), + 1, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1360,21 +1369,20 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root cur = path->nodes[*level]; if (*level > 0 && path->slots[*level] == 0) - reada_walk_down(root, btrfs_buffer_node(cur)); + reada_walk_down(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 0) { ret = drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); BUG_ON(ret); if (refs != 1) { @@ -1384,8 +1392,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root continue; } next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next)) { - brelse(next); + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); next = read_tree_block(root, blocknr); mutex_lock(&root->fs_info->fs_mutex); @@ -1395,7 +1403,7 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - brelse(next); + free_extent_buffer(next); ret = btrfs_free_extent(trans, root, blocknr, 1, 1); BUG_ON(ret); @@ -1404,17 +1412,17 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root } WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), 1, 1); - btrfs_block_release(root, path->nodes[*level]); + extent_buffer_blocknr(path->nodes[*level]), 1, 1); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; BUG_ON(ret); @@ -1436,24 +1444,24 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { - struct btrfs_node *node; - node = btrfs_buffer_node(path->nodes[i]); + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { + struct extent_buffer *node; + struct btrfs_disk_key disk_key; + node = path->nodes[i]; path->slots[i]++; *level = i; WARN_ON(*level == 0); + btrfs_node_key(node, &disk_key, path->slots[i]); memcpy(&root_item->drop_progress, - &node->ptrs[path->slots[i]].key, - sizeof(root_item->drop_progress)); + &disk_key, sizeof(disk_key)); root_item->drop_level = i; return 0; } else { ret = btrfs_free_extent(trans, root, - bh_blocknr(path->nodes[*level]), - 1, 1); + extent_buffer_blocknr(path->nodes[*level]), + 1, 1); BUG_ON(ret); - btrfs_block_release(root, path->nodes[*level]); + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -1480,15 +1488,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { path->nodes[level] = root->node; path->slots[level] = 0; } else { struct btrfs_key key; - struct btrfs_disk_key *found_key; - struct btrfs_node *node; + struct btrfs_disk_key found_key; + struct extent_buffer *node; btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); level = root_item->drop_level; @@ -1498,10 +1506,10 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ret = wret; goto out; } - node = btrfs_buffer_node(path->nodes[level]); - found_key = &node->ptrs[path->slots[level]].key; - WARN_ON(memcmp(found_key, &root_item->drop_progress, - sizeof(*found_key))); + node = path->nodes[level]; + btrfs_node_key(node, &found_key, path->slots[level]); + WARN_ON(memcmp(&found_key, &root_item->drop_progress, + sizeof(found_key))); } while(1) { wret = walk_down_tree(trans, root, path, &level); @@ -1516,12 +1524,12 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0) ret = wret; ret = -EAGAIN; - get_bh(root->node); + extent_buffer_get(root->node); break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } @@ -1581,13 +1589,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_path *path; int ret; int err = 0; - struct btrfs_block_group_item *bi; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; struct radix_tree_root *radix; struct btrfs_key key; struct btrfs_key found_key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 group_size_blocks; u64 used; @@ -1596,7 +1603,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) root = info->extent_root; key.objectid = 0; key.offset = group_size_blocks; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1610,18 +1616,18 @@ int btrfs_read_block_groups(struct btrfs_root *root) err = ret; break; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kmalloc(sizeof(*cache), GFP_NOFS); if (!cache) { err = -1; break; } - bi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_block_group_item); - if (bi->flags & BTRFS_BLOCK_GROUP_DATA) { + read_extent_buffer(leaf, &cache->item, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(cache->item)); + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { radix = &info->block_group_data_radix; cache->data = 1; } else { @@ -1629,7 +1635,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) cache->data = 0; } - memcpy(&cache->item, bi, sizeof(*bi)); memcpy(&cache->key, &found_key, sizeof(found_key)); cache->last_alloc = cache->key.objectid; cache->first_free = cache->key.objectid; @@ -1640,11 +1645,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) key.objectid = found_key.objectid + found_key.offset; btrfs_release_path(root, path); + ret = radix_tree_insert(radix, found_key.objectid + found_key.offset - 1, (void *)cache); BUG_ON(ret); - used = btrfs_block_group_used(bi); + used = btrfs_block_group_used(&cache->item); if (used < div_factor(key.offset, 8)) { radix_tree_tag_set(radix, found_key.objectid + found_key.offset - 1, diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 0ab368e091f9..55fefdfab84c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -26,16 +26,6 @@ struct tree_entry { struct rb_node rb_node; }; -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) - -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - void __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", @@ -223,7 +213,8 @@ int add_extent_mapping(struct extent_map_tree *tree, if (rb) prev = rb_entry(rb, struct extent_map, rb_node); if (prev && prev->end + 1 == em->start && - ((em->block_start == 0 && prev->block_start == 0) || + ((em->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || (em->block_start == prev->block_end + 1))) { em->start = prev->start; em->block_start = prev->block_start; @@ -926,6 +917,40 @@ int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->state & bits) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + u64 find_lock_delalloc_range(struct extent_map_tree *tree, u64 start, u64 lock_start, u64 *end, u64 max_bytes) { @@ -1450,7 +1475,7 @@ int extent_read_full_page(struct extent_map_tree *tree, struct page *page, em = NULL; /* we've found a hole, just zero and go on */ - if (block_start == 0) { + if (block_start == EXTENT_MAP_HOLE) { zero_user_page(page, page_offset, iosize, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1593,7 +1618,8 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, free_extent_map(em); em = NULL; - if (block_start == 0 || block_start == EXTENT_MAP_INLINE) { + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); cur = cur + iosize; @@ -1630,7 +1656,6 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, nr++; } done: - WARN_ON(test_range_bit(tree, start, page_end, EXTENT_DIRTY, 0)); unlock_extent(tree, start, page_end, GFP_NOFS); unlock_page(page); return 0; @@ -1827,8 +1852,623 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == 0) - return 0; + em->block_start == EXTENT_MAP_HOLE) + return 0; return (em->block_start + start - em->start) >> inode->i_blkbits; } + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 0; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) + goto fail; + eb->pages[i] = p; + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + + eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask); + if (!eb || IS_ERR(eb)) + return NULL; + + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + for (i = 0; i < num_pages; i++, index++) { + p = find_get_page(mapping, index); + if (!p) + goto fail; + eb->pages[i] = p; + } + return eb; +fail: + free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + if (eb->pages[i]) + page_cache_release(eb->pages[i]); + } + kfree(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + lock_page(page); + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + start = page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + return set_range_dirty(tree, eb->start, eb->start + eb->len - 1); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait) +{ + unsigned long i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT) + 1; + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = 0; i < num_pages; i++) { + page = eb->pages[i]; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(dst, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + + dst += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = (i << PAGE_CACHE_SHIFT) - offset; + } + + // kaddr = kmap_atomic(eb->pages[i], km); + kaddr = page_address(eb->pages[i]); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + // kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + ret = memcmp(ptr, kaddr + offset, cur); + // kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memcpy(kaddr + offset, src, cur); + // kunmap_atomic(kaddr, KM_USER0); + + src += cur; + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + page = eb->pages[i]; + offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + // kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); + memset(kaddr + offset, c, cur); + // kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + page = eb->pages[i]; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1); + if (i == 0) + offset += start_offset; + + while(len > 0) { + page = dst->pages[i]; + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + // kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + // kunmap_atomic(kaddr, KM_USER1); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + // char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + // kunmap_atomic(src_kaddr, KM_USER1); + } + // kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + //kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + /* + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); + */ +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = dst_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_offset & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE - + dst_off_in_page)); + + copy_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = dst_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = src_end & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + if (src_i == 0) + src_off_in_page += start_offset; + if (dst_i == 0) + dst_off_in_page += start_offset; + + cur = min(len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); +// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); + move_pages(dst->pages[dst_i], dst->pages[src_i], + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur - 1; + src_end -= cur - 1; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index d913ce01248d..430b997a70f6 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,9 +3,20 @@ #include +#define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + struct extent_map_ops { int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end); @@ -51,10 +62,13 @@ struct extent_state { struct list_head list; }; +#define EXTENT_BUFFER_SIZE(nr) (sizeof(struct extent_buffer) + \ + (nr - 1) * sizeof(struct page *)) struct extent_buffer { u64 start; - u64 end; /* inclusive */ - char *addr; + unsigned long len; + atomic_t refs; + int flags; struct page *pages[]; }; @@ -87,8 +101,12 @@ int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, + gfp_t mask); int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); +int find_first_extent_bit(struct extent_map_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); int extent_invalidatepage(struct extent_map_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct extent_map_tree *tree, struct page *page, @@ -106,4 +124,57 @@ int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_map_tree *tree, + struct extent_buffer *eb, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) +{ + return eb->start / 4096; +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_map_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_map_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index ea4dd4c5fce3..226f6d028c3f 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -34,12 +34,12 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *item; struct btrfs_key file_key; struct btrfs_path *path; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); file_key.objectid = objectid; file_key.offset = pos; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_insert_empty_item(trans, root, path, &file_key, @@ -47,15 +47,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, if (ret < 0) goto out; BUG_ON(ret); - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(item, offset); - btrfs_set_file_extent_disk_num_blocks(item, disk_num_blocks); - btrfs_set_file_extent_offset(item, 0); - btrfs_set_file_extent_num_blocks(item, num_blocks); - btrfs_set_file_extent_generation(item, trans->transid); - btrfs_set_file_extent_type(item, BTRFS_FILE_EXTENT_REG); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_set_file_extent_disk_blocknr(leaf, item, offset); + btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_offset(leaf, item, 0); + btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_generation(leaf, item, trans->transid); + btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(leaf); out: btrfs_free_path(path); return ret; @@ -71,32 +72,30 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset = 0; int csums_in_item; file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); if (ret < 0) goto fail; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; if (ret > 0) { ret = 1; if (path->slots[0] == 0) goto fail; path->slots[0]--; - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || found_key.objectid != objectid) { goto fail; } csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; - csums_in_item = btrfs_item_size(leaf->items + path->slots[0]); + csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); csums_in_item /= BTRFS_CRC32_SIZE; if (csum_offset >= csums_in_item) { @@ -127,7 +126,6 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, file_key.objectid = objectid; file_key.offset = offset; - file_key.flags = 0; btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); ret = btrfs_search_slot(trans, root, &file_key, path, ins_len, cow); return ret; @@ -138,12 +136,14 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 objectid, u64 offset, char *data, size_t len) { + return 0; +#if 0 int ret; struct btrfs_key file_key; struct btrfs_key found_key; struct btrfs_path *path; struct btrfs_csum_item *item; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; u64 csum_offset; path = btrfs_alloc_path(); @@ -161,8 +161,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, if (ret == -EFBIG) { u32 item_size; /* we found one, but it isn't big enough yet */ - leaf = btrfs_buffer_leaf(path->nodes[0]); - item_size = btrfs_item_size(leaf->items + path->slots[0]); + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); if ((item_size / BTRFS_CRC32_SIZE) >= MAX_CSUM_ITEMS(root)) { /* already at max size, make a new one */ goto insert; @@ -188,8 +188,8 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, goto insert; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); csum_offset = (offset - found_key.offset) >> root->fs_info->sb->s_blocksize_bits; if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || @@ -197,10 +197,10 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, csum_offset >= MAX_CSUM_ITEMS(root)) { goto insert; } - if (csum_offset >= btrfs_item_size(leaf->items + path->slots[0]) / + if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / BTRFS_CRC32_SIZE) { u32 diff = (csum_offset + 1) * BTRFS_CRC32_SIZE; - diff = diff - btrfs_item_size(leaf->items + path->slots[0]); + diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); if (diff != BTRFS_CRC32_SIZE) goto insert; ret = btrfs_extend_item(trans, root, path, diff); @@ -220,21 +220,20 @@ insert: goto fail; } csum: - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], - struct btrfs_csum_item); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); ret = 0; item = (struct btrfs_csum_item *)((unsigned char *)item + csum_offset * BTRFS_CRC32_SIZE); found: - btrfs_check_bounds(&item->csum, BTRFS_CRC32_SIZE, - path->nodes[0]->b_data, - root->fs_info->sb->s_blocksize); + /* FIXME!!!!!!!!!!!! */ ret = btrfs_csum_data(root, data, len, &item->csum); btrfs_mark_buffer_dirty(path->nodes[0]); fail: btrfs_release_path(root, path); btrfs_free_path(path); return ret; +#endif } int btrfs_csum_truncate(struct btrfs_trans_handle *trans, @@ -242,21 +241,21 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, u64 isize) { struct btrfs_key key; - struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[0]); + struct extent_buffer *leaf = path->nodes[0]; int slot = path->slots[0]; int ret; u32 new_item_size; u64 new_item_span; u64 blocks; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (isize <= key.offset) return 0; new_item_span = isize - key.offset; - blocks = (new_item_span + root->blocksize - 1) >> + blocks = (new_item_span + root->sectorsize - 1) >> root->fs_info->sb->s_blocksize_bits; new_item_size = blocks * BTRFS_CRC32_SIZE; - if (new_item_size >= btrfs_item_size(leaf->items + slot)) + if (new_item_size >= btrfs_item_size_nr(leaf, slot)) return 0; ret = btrfs_truncate_item(trans, root, path, new_item_size); BUG_ON(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4cc459c943ec..1734ca695555 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include @@ -88,7 +87,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, { struct btrfs_key key; struct btrfs_path *path; - char *ptr, *kaddr; + struct extent_buffer *leaf; + char *kaddr; + unsigned long ptr; struct btrfs_file_extent_item *ei; u32 datasize; int err = 0; @@ -102,7 +103,6 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); BUG_ON(size >= PAGE_CACHE_SIZE); datasize = btrfs_file_extent_calc_inline_size(size); @@ -113,18 +113,17 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, err = ret; goto fail; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, - BTRFS_FILE_EXTENT_INLINE); + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); kaddr = kmap_atomic(page, KM_USER0); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, kaddr + page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, size); kunmap_atomic(kaddr, KM_USER0); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); return err; @@ -156,8 +155,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; down_read(&BTRFS_I(inode)->root->snap_sem); @@ -184,7 +183,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, if (inode->i_size < start_pos) { u64 last_pos_in_file; u64 hole_size; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; last_pos_in_file = (isize + mask) & ~mask; hole_size = (start_pos - last_pos_in_file + mask) & ~mask; @@ -227,8 +226,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, /* step one, delete the existing extents in this range */ /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->blocksize -1) & - ~((u64)root->blocksize - 1), &hint_block); + (pos + write_bytes + root->sectorsize -1) & + ~((u64)root->sectorsize - 1), &hint_block); if (err) goto failed; @@ -288,7 +287,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; struct btrfs_file_extent_item *extent; u64 extent_end = 0; @@ -327,10 +326,10 @@ next_slot: found_extent = 0; found_inline = 0; extent = NULL; - leaf = btrfs_buffer_leaf(path->nodes[0]); + leaf = path->nodes[0]; slot = path->slots[0]; ret = 0; - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + btrfs_item_key_to_cpu(leaf, &key, slot); if (key.offset >= end || key.objectid != inode->i_ino) { goto out; } @@ -344,17 +343,18 @@ next_slot: if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - found_type = btrfs_file_extent_type(extent); + found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(extent) << + (btrfs_file_extent_num_blocks(leaf, extent) << inode->i_blkbits); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + struct btrfs_item *item; + item = btrfs_item_nr(leaf, slot); found_inline = 1; extent_end = key.offset + - btrfs_file_extent_inline_len(leaf->items + - slot); + btrfs_file_extent_inline_len(leaf, item); } } else { extent_end = search_start; @@ -365,8 +365,7 @@ next_slot: search_start >= extent_end) { int nextret; u32 nritems; - nritems = btrfs_header_nritems( - btrfs_buffer_header(path->nodes[0])); + nritems = btrfs_header_nritems(leaf); if (slot >= nritems - 1) { nextret = btrfs_next_leaf(root, path); if (nextret) @@ -380,7 +379,7 @@ next_slot: /* FIXME, there's only one inline extent allowed right now */ if (found_inline) { - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; search_start = (extent_end + mask) & ~mask; } else search_start = extent_end; @@ -388,10 +387,13 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf,extent); u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); - memcpy(&old, extent, sizeof(old)); + btrfs_file_extent_disk_num_blocks(leaf, + extent); + read_extent_buffer(leaf, &old, + (unsigned long)extent, + sizeof(old)); if (disk_blocknr != 0) { ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, disk_num_blocks); @@ -406,20 +408,24 @@ next_slot: u64 new_num; u64 old_num; keep = 1; - WARN_ON(start & (root->blocksize - 1)); + WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { new_num = (start - key.offset) >> inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(extent); + old_num = btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); - if (btrfs_file_extent_disk_blocknr(extent)) { + btrfs_file_extent_disk_blocknr(leaf, + extent); + if (btrfs_file_extent_disk_blocknr(leaf, + extent)) { inode->i_blocks -= (old_num - new_num) << 3; } - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, + extent, new_num); - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); } @@ -431,13 +437,17 @@ next_slot: u64 extent_num_blocks = 0; if (found_extent) { disk_blocknr = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); disk_num_blocks = - btrfs_file_extent_disk_num_blocks(extent); + btrfs_file_extent_disk_num_blocks(leaf, + extent); extent_num_blocks = - btrfs_file_extent_num_blocks(extent); + btrfs_file_extent_num_blocks(leaf, + extent); *hint_block = - btrfs_file_extent_disk_blocknr(extent); + btrfs_file_extent_disk_blocknr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ @@ -464,42 +474,37 @@ next_slot: struct btrfs_key ins; ins.objectid = inode->i_ino; ins.offset = end; - ins.flags = 0; btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); btrfs_release_path(root, path); ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*extent)); + leaf = path->nodes[0]; if (ret) { - btrfs_print_leaf(root, btrfs_buffer_leaf(path->nodes[0])); - printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.flags, ins.offset, start, end, key.offset, extent_end, keep); + btrfs_print_leaf(root, leaf); + printk("got %d on inserting %Lu %u %Lu start %Lu end %Lu found %Lu %Lu keep was %d\n", ret , ins.objectid, ins.type, ins.offset, start, end, key.offset, extent_end, keep); } BUG_ON(ret); - extent = btrfs_item_ptr( - btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(extent, - btrfs_file_extent_disk_blocknr(&old)); - btrfs_set_file_extent_disk_num_blocks(extent, - btrfs_file_extent_disk_num_blocks(&old)); - - btrfs_set_file_extent_offset(extent, - btrfs_file_extent_offset(&old) + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + write_extent_buffer(leaf, &old, + (unsigned long)extent, sizeof(old)); + + btrfs_set_file_extent_offset(leaf, extent, + le64_to_cpu(old.offset) + ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(btrfs_file_extent_num_blocks(&old) < + WARN_ON(le64_to_cpu(old.num_blocks) < (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(extent, + btrfs_set_file_extent_num_blocks(leaf, extent, (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_type(extent, + btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); - btrfs_set_file_extent_generation(extent, - btrfs_file_extent_generation(&old)); btrfs_mark_buffer_dirty(path->nodes[0]); - if (btrfs_file_extent_disk_blocknr(&old) != 0) { + if (le64_to_cpu(old.disk_blocknr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(extent) << 3; + btrfs_file_extent_num_blocks(leaf, + extent) << 3; } ret = 0; goto out; @@ -529,8 +534,8 @@ static int prepare_pages(struct btrfs_root *root, u64 num_blocks; u64 start_pos; - start_pos = pos & ~((u64)root->blocksize - 1); - num_blocks = (write_bytes + pos - start_pos + root->blocksize - 1) >> + start_pos = pos & ~((u64)root->sectorsize - 1); + num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index b78346177539..35d2608f8918 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -20,24 +20,18 @@ #include "disk-io.h" #include "transaction.h" -int btrfs_insert_inode(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 objectid, struct btrfs_inode_item - *inode_item) +int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 objectid) { - struct btrfs_path *path; struct btrfs_key key; int ret; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - path = btrfs_alloc_path(); - BUG_ON(!path); - ret = btrfs_insert_item(trans, root, &key, inode_item, - sizeof(*inode_item)); - btrfs_release_path(root, path); - btrfs_free_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_inode_item)); if (ret == 0 && objectid > root->highest_inode) root->highest_inode = objectid; return ret; @@ -51,15 +45,15 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int cow = mod != 0; int ret; int slot; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_key found_key; ret = btrfs_search_slot(trans, root, location, path, ins_len, cow); if (ret > 0 && btrfs_key_type(location) == BTRFS_ROOT_ITEM_KEY && location->offset == (u64)-1 && path->slots[0] != 0) { slot = path->slots[0] - 1; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, &leaf->items[slot].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, slot); if (found_key.objectid == location->objectid && btrfs_key_type(&found_key) == btrfs_key_type(location)) { path->slots[0]--; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 405470866254..ab74977adf5c 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -24,8 +24,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; + struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); @@ -39,8 +40,9 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid) BUG_ON(ret == 0); if (path->slots[0] > 0) { slot = path->slots[0] - 1; - l = btrfs_buffer_leaf(path->nodes[0]); - *objectid = btrfs_disk_key_objectid(&l->items[slot].key); + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, slot); + *objectid = found_key.objectid; } else { *objectid = BTRFS_FIRST_FREE_OBJECTID; } @@ -64,13 +66,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, int slot = 0; u64 last_ino = 0; int start_found; - struct btrfs_leaf *l; + struct extent_buffer *l; struct btrfs_key search_key; u64 search_start = dirid; path = btrfs_alloc_path(); BUG_ON(!path); - search_key.flags = 0; search_start = root->last_inode_alloc; search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); search_key.objectid = search_start; @@ -86,9 +87,9 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, path->slots[0]--; while (1) { - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - if (slot >= btrfs_header_nritems(&l->header)) { + if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret == 0) continue; @@ -103,7 +104,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, last_ino : search_start; goto found; } - btrfs_disk_key_to_cpu(&key, &l->items[slot].key); + btrfs_item_key_to_cpu(l, &key, slot); if (key.objectid >= search_start) { if (start_found) { if (last_ino < search_start) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b03d40a907ca..fbe2836364e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -159,10 +159,8 @@ out: int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { - char csum[BTRFS_CRC32_SIZE]; size_t offset = start - (page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; char *kaddr; u64 private; @@ -173,11 +171,15 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) if (ret) { goto zeroit; } + /* + struct btrfs_root *root = BTRFS_I(inode)->root; + char csum[BTRFS_CRC32_SIZE]; ret = btrfs_csum_data(root, kaddr + offset, end - start + 1, csum); BUG_ON(ret); if (memcmp(csum, &private, BTRFS_CRC32_SIZE)) { goto zeroit; } + */ kunmap_atomic(kaddr, KM_IRQ0); return 0; @@ -192,7 +194,9 @@ zeroit: void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; + struct extent_buffer *leaf; struct btrfs_inode_item *inode_item; + struct btrfs_inode_timespec *tspec; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key location; u64 alloc_group_block; @@ -205,29 +209,37 @@ void btrfs_read_locked_inode(struct inode *inode) memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); - if (ret) { + if (ret) goto make_bad; - } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], - struct btrfs_inode_item); - inode->i_mode = btrfs_inode_mode(inode_item); - inode->i_nlink = btrfs_inode_nlink(inode_item); - inode->i_uid = btrfs_inode_uid(inode_item); - inode->i_gid = btrfs_inode_gid(inode_item); - inode->i_size = btrfs_inode_size(inode_item); - inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime); - inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime); - inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime); - inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime); - inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime); - inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime); - inode->i_blocks = btrfs_inode_nblocks(inode_item); - inode->i_generation = btrfs_inode_generation(inode_item); + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_inode_item); + + inode->i_mode = btrfs_inode_mode(leaf, inode_item); + inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + inode->i_uid = btrfs_inode_uid(leaf, inode_item); + inode->i_gid = btrfs_inode_gid(leaf, inode_item); + inode->i_size = btrfs_inode_size(leaf, inode_item); + + tspec = btrfs_inode_atime(inode_item); + inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_mtime(inode_item); + inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + tspec = btrfs_inode_ctime(inode_item); + inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec); + inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec); + + inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item); + inode->i_generation = btrfs_inode_generation(leaf, inode_item); inode->i_rdev = 0; - rdev = btrfs_inode_rdev(inode_item); - alloc_group_block = btrfs_inode_block_group(inode_item); + rdev = btrfs_inode_rdev(leaf, inode_item); + + alloc_group_block = btrfs_inode_block_group(leaf, inode_item); BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info, alloc_group_block); @@ -267,24 +279,35 @@ make_bad: make_bad_inode(inode); } -static void fill_inode_item(struct btrfs_inode_item *item, +static void fill_inode_item(struct extent_buffer *leaf, + struct btrfs_inode_item *item, struct inode *inode) { - btrfs_set_inode_uid(item, inode->i_uid); - btrfs_set_inode_gid(item, inode->i_gid); - btrfs_set_inode_size(item, inode->i_size); - btrfs_set_inode_mode(item, inode->i_mode); - btrfs_set_inode_nlink(item, inode->i_nlink); - btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec); - btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec); - btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec); - btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec); - btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec); - btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec); - btrfs_set_inode_nblocks(item, inode->i_blocks); - btrfs_set_inode_generation(item, inode->i_generation); - btrfs_set_inode_rdev(item, inode->i_rdev); - btrfs_set_inode_block_group(item, + btrfs_set_inode_uid(leaf, item, inode->i_uid); + btrfs_set_inode_gid(leaf, item, inode->i_gid); + btrfs_set_inode_size(leaf, item, inode->i_size); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), + inode->i_atime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), + inode->i_mtime.tv_nsec); + + btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_sec); + btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), + inode->i_ctime.tv_nsec); + + btrfs_set_inode_nblocks(leaf, item, inode->i_blocks); + btrfs_set_inode_generation(leaf, item, inode->i_generation); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); + btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group->key.objectid); } @@ -294,6 +317,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, { struct btrfs_inode_item *inode_item; struct btrfs_path *path; + struct extent_buffer *leaf; int ret; path = btrfs_alloc_path(); @@ -306,12 +330,12 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } - inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + leaf = path->nodes[0]; + inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - fill_inode_item(inode_item, inode); - btrfs_mark_buffer_dirty(path->nodes[0]); + fill_inode_item(leaf, inode_item, inode); + btrfs_mark_buffer_dirty(leaf); btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: @@ -330,8 +354,9 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, const char *name = dentry->d_name.name; int name_len = dentry->d_name.len; int ret = 0; - u64 objectid; + struct extent_buffer *leaf; struct btrfs_dir_item *di; + struct btrfs_key key; path = btrfs_alloc_path(); if (!path) { @@ -349,14 +374,15 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans, ret = -ENOENT; goto err; } - objectid = btrfs_disk_key_objectid(&di->location); + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); ret = btrfs_delete_one_dir_name(trans, root, path, di); if (ret) goto err; btrfs_release_path(root, path); di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, - objectid, name, name_len, -1); + key.objectid, name, name_len, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); goto err; @@ -391,12 +417,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) root = BTRFS_I(dir)->root; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); + return ret; } @@ -411,7 +440,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; struct btrfs_key found_key; int found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; char *goodnames = ".."; unsigned long nr; @@ -419,10 +448,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) BUG_ON(!path); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; while(1) { ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) { @@ -435,9 +465,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out; } path->slots[0]--; - leaf = btrfs_buffer_leaf(path->nodes[0]); - btrfs_disk_key_to_cpu(&found_key, - &leaf->items[path->slots[0]].key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); if (found_key.objectid != inode->i_ino) { err = -ENOENT; @@ -513,9 +542,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, int ret; struct btrfs_path *path; struct btrfs_key key; - struct btrfs_disk_key *found_key; + struct btrfs_key found_key; u32 found_type; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; u64 extent_num_blocks = 0; @@ -527,10 +556,12 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); path->reada = -1; BUG_ON(!path); + /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; - key.flags = (u32)-1; + key.type = (u8)-1; + while(1) { btrfs_init_path(path); fi = NULL; @@ -542,27 +573,28 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, BUG_ON(path->slots[0] == 0); path->slots[0]--; } - leaf = btrfs_buffer_leaf(path->nodes[0]); - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); - if (btrfs_disk_key_objectid(found_key) != inode->i_ino) + if (found_key.objectid != inode->i_ino) break; + if (found_type != BTRFS_CSUM_ITEM_KEY && found_type != BTRFS_DIR_ITEM_KEY && found_type != BTRFS_DIR_INDEX_KEY && found_type != BTRFS_EXTENT_DATA_KEY) break; - item_end = btrfs_disk_key_offset(found_key); + item_end = found_key.offset; if (found_type == BTRFS_EXTENT_DATA_KEY) { - fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], + fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) != + if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_num_blocks(fi) << - inode->i_blkbits; + item_end += + btrfs_file_extent_num_blocks(leaf, fi) << + inode->i_blkbits; } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -583,7 +615,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_set_key_type(&key, found_type); continue; } - if (btrfs_disk_key_offset(found_key) >= inode->i_size) + if (found_key.offset >= inode->i_size) del_item = 1; else del_item = 0; @@ -591,30 +623,31 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type == BTRFS_EXTENT_DATA_KEY && - btrfs_file_extent_type(fi) != + btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(fi); + extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); if (!del_item) { u64 orig_num_blocks = - btrfs_file_extent_num_blocks(fi); + btrfs_file_extent_num_blocks(leaf, fi); extent_num_blocks = inode->i_size - - btrfs_disk_key_offset(found_key) + - root->blocksize - 1; + found_key.offset + root->sectorsize - 1; extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(fi, + btrfs_set_file_extent_num_blocks(leaf, fi, extent_num_blocks); num_dec = (orig_num_blocks - extent_num_blocks) << 3; if (extent_start != 0) { inode->i_blocks -= num_dec; } - btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_mark_buffer_dirty(leaf); } else { extent_num_blocks = - btrfs_file_extent_disk_num_blocks(fi); + btrfs_file_extent_disk_num_blocks(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(fi) << 3; + num_dec = btrfs_file_extent_num_blocks(leaf, + fi) << 3; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -725,7 +758,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 mask = root->blocksize - 1; + u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; u64 block_end = attr->ia_size | mask; u64 hole_size; @@ -771,9 +804,11 @@ void btrfs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) { goto no_delete; } + inode->i_size = 0; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); if (ret) @@ -782,6 +817,7 @@ void btrfs_delete_inode(struct inode *inode) if (ret) goto no_delete_lock; nr = trans->blocks_used; + btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); @@ -819,7 +855,7 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, ret = 0; goto out; } - btrfs_disk_key_to_cpu(location, &di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, location); out: btrfs_release_path(root, path); btrfs_free_path(path); @@ -856,7 +892,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, ri = &(*sub_root)->root_item; location->objectid = btrfs_root_dirid(ri); - location->flags = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); location->offset = 0; @@ -908,11 +943,14 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); + mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); mutex_unlock(&root->fs_info->fs_mutex); + if (ret < 0) return ERR_PTR(ret); + inode = NULL; if (location.objectid) { ret = fixup_tree_root_location(root, &location, &sub_root, @@ -952,10 +990,11 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct btrfs_item *item; struct btrfs_dir_item *di; struct btrfs_key key; + struct btrfs_key found_key; struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; int advance; unsigned char d_type; @@ -964,15 +1003,19 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) u32 di_total; u32 di_len; int key_type = BTRFS_DIR_INDEX_KEY; + char tmp_name[32]; + char *name_ptr; + int name_len; /* FIXME, use a real flag for deciding about the key type */ if (root->fs_info->tree_root == root) key_type = BTRFS_DIR_ITEM_KEY; + mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, key_type); key.offset = filp->f_pos; + path = btrfs_alloc_path(); path->reada = 2; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -980,16 +1023,16 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto err; advance = 0; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (advance || slot >= nritems) { if (slot >= nritems -1) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } else { slot++; @@ -997,28 +1040,48 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } } advance = 1; - item = leaf->items + slot; - if (btrfs_disk_key_objectid(&item->key) != key.objectid) + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + if (found_key.objectid != key.objectid) break; - if (btrfs_disk_key_type(&item->key) != key_type) + if (btrfs_key_type(&found_key) != key_type) break; - if (btrfs_disk_key_offset(&item->key) < filp->f_pos) + if (found_key.offset < filp->f_pos) continue; - filp->f_pos = btrfs_disk_key_offset(&item->key); + + filp->f_pos = found_key.offset; advance = 1; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); di_cur = 0; - di_total = btrfs_item_size(leaf->items + slot); + di_total = btrfs_item_size(leaf, item); while(di_cur < di_total) { - d_type = btrfs_filetype_table[btrfs_dir_type(di)]; - over = filldir(dirent, (const char *)(di + 1), - btrfs_dir_name_len(di), - btrfs_disk_key_offset(&item->key), - btrfs_disk_key_objectid(&di->location), + struct btrfs_key location; + + name_len = btrfs_dir_name_len(leaf, di); + if (name_len < 32) { + name_ptr = tmp_name; + } else { + name_ptr = kmalloc(name_len, GFP_NOFS); + BUG_ON(!name_ptr); + } + read_extent_buffer(leaf, name_ptr, + (unsigned long)(di + 1), name_len); + + d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; + btrfs_dir_item_key_to_cpu(leaf, di, &location); + + over = filldir(dirent, name_ptr, name_len, + found_key.offset, + location.objectid, d_type); + + if (name_ptr != tmp_name) + kfree(name_ptr); + if (over) goto nopos; - di_len = btrfs_dir_name_len(di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -1075,11 +1138,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int mode) { struct inode *inode; - struct btrfs_inode_item inode_item; + struct btrfs_inode_item *inode_item; struct btrfs_key *location; + struct btrfs_path *path; int ret; int owner; + path = btrfs_alloc_path(); + BUG_ON(!path); + inode = new_inode(root->fs_info->sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -1095,24 +1162,32 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, group = btrfs_find_block_group(root, group, 0, 0, owner); BTRFS_I(inode)->block_group = group; + ret = btrfs_insert_empty_inode(trans, root, path, objectid); + if (ret) + goto fail; + inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_mode = mode; inode->i_ino = objectid; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - fill_inode_item(&inode_item, inode); + inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + fill_inode_item(path->nodes[0], inode_item, inode); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + location = &BTRFS_I(inode)->location; location->objectid = objectid; - location->flags = 0; location->offset = 0; btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - ret = btrfs_insert_inode(trans, root, objectid, &inode_item); - if (ret) - return ERR_PTR(ret); insert_inode_hash(inode); return inode; +fail: + btrfs_free_path(path); + return ERR_PTR(ret); } static inline u8 btrfs_inode_type(struct inode *inode) @@ -1127,8 +1202,8 @@ static int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root; struct inode *parent_inode; + key.objectid = inode->i_ino; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; @@ -1285,14 +1360,18 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); err = btrfs_add_nondir(trans, dentry, inode); + if (err) drop_inode = 1; + dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, dir); err = btrfs_update_inode(trans, root, inode); + if (err) drop_inode = 1; @@ -1321,13 +1400,13 @@ static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans, key.objectid = objectid; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid, &key, BTRFS_FT_DIR); if (ret) goto error; + key.objectid = dirid; ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid, &key, BTRFS_FT_DIR); @@ -1350,6 +1429,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_unlock; @@ -1367,6 +1447,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = PTR_ERR(inode); goto out_fail; } + drop_on_err = 1; inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; @@ -1380,9 +1461,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_update_inode(trans, root, inode); if (err) goto out_fail; + err = btrfs_add_link(trans, dentry, inode); if (err) goto out_fail; + d_instantiate(dentry, inode); drop_on_err = 0; dir->i_sb->s_dirt = 1; @@ -1392,6 +1475,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction(trans, root); + out_unlock: mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) @@ -1415,8 +1499,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; - struct btrfs_leaf *leaf; - struct btrfs_disk_key *found_key; + struct extent_buffer *leaf; + struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct btrfs_trans_handle *trans = NULL; @@ -1436,8 +1520,8 @@ again: err = -ENOMEM; goto out; } - em->start = 0; - em->end = 0; + em->start = EXTENT_MAP_HOLE; + em->end = EXTENT_MAP_HOLE; } em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(NULL, root, path, @@ -1453,25 +1537,27 @@ again: path->slots[0]--; } - item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0], + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - leaf = btrfs_buffer_leaf(path->nodes[0]); - blocknr = btrfs_file_extent_disk_blocknr(item); - blocknr += btrfs_file_extent_offset(item); + + blocknr = btrfs_file_extent_disk_blocknr(leaf, item); + blocknr += btrfs_file_extent_offset(leaf, item); /* are we inside the extent that was found? */ - found_key = &leaf->items[path->slots[0]].key; - found_type = btrfs_disk_key_type(found_key); - if (btrfs_disk_key_objectid(found_key) != objectid || + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != objectid || found_type != BTRFS_EXTENT_DATA_KEY) { goto not_found; } - found_type = btrfs_file_extent_type(item); - extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key); + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(item) << inode->i_blkbits); + (btrfs_file_extent_num_blocks(leaf, item) << + inode->i_blkbits); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1484,28 +1570,29 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(item) == 0) { + if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { em->start = extent_start; em->end = extent_end - 1; - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; goto insert; } em->block_start = blocknr << inode->i_blkbits; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(item) << + (btrfs_file_extent_num_blocks(leaf, item) << inode->i_blkbits) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - char *ptr; + unsigned long ptr; char *map; u32 size; - size = btrfs_file_extent_inline_len(leaf->items + - path->slots[0]); - extent_end = extent_start | ((u64)root->blocksize - 1); + size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, + path->slots[0])); + + extent_end = extent_start | ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1517,18 +1604,21 @@ again: } goto not_found_em; } + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; em->start = extent_start; em->end = extent_end; + if (!page) { goto insert; } + ptr = btrfs_file_extent_inline_start(item); map = kmap(page); - memcpy(map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, size); memset(map + page_offset + size, 0, - root->blocksize - (page_offset + size)); + root->sectorsize - (page_offset + size)); flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1542,8 +1632,8 @@ not_found: em->start = start; em->end = end; not_found_em: - em->block_start = 0; - em->block_end = 0; + em->block_start = EXTENT_MAP_HOLE; + em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); if (em->start > start || em->end < start) { @@ -1712,6 +1802,7 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_truncate_in_trans(trans, root, inode); btrfs_update_inode(trans, root, inode); nr = trans->blocks_used; + ret = btrfs_end_transaction(trans, root); BUG_ON(ret); mutex_unlock(&root->fs_info->fs_mutex); @@ -1731,8 +1822,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) struct btrfs_key key; struct btrfs_root_item root_item; struct btrfs_inode_item *inode_item; - struct buffer_head *subvol; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; struct btrfs_root *new_root; struct inode *inode; struct inode *dir; @@ -1746,34 +1836,37 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - subvol = btrfs_alloc_free_block(trans, root, 0, 0); - if (IS_ERR(subvol)) - return PTR_ERR(subvol); - leaf = btrfs_buffer_leaf(subvol); - btrfs_set_header_nritems(&leaf->header, 0); - btrfs_set_header_level(&leaf->header, 0); - btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol)); - btrfs_set_header_generation(&leaf->header, trans->transid); - btrfs_set_header_owner(&leaf->header, root->root_key.objectid); - memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid, - sizeof(leaf->header.fsid)); - btrfs_mark_buffer_dirty(subvol); + leaf = btrfs_alloc_free_block(trans, root, 0, 0); + if (IS_ERR(leaf)) + return PTR_ERR(leaf); + + btrfs_set_header_nritems(leaf, 0); + btrfs_set_header_level(leaf, 0); + btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_generation(leaf, trans->transid); + btrfs_set_header_owner(leaf, root->root_key.objectid); + write_extent_buffer(leaf, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(leaf), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(leaf); inode_item = &root_item.inode; memset(inode_item, 0, sizeof(*inode_item)); - btrfs_set_inode_generation(inode_item, 1); - btrfs_set_inode_size(inode_item, 3); - btrfs_set_inode_nlink(inode_item, 1); - btrfs_set_inode_nblocks(inode_item, 1); - btrfs_set_inode_mode(inode_item, S_IFDIR | 0755); + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nblocks = cpu_to_le64(1); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol)); + btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_blocks_used(&root_item, 0); + btrfs_set_root_used(&root_item, 0); + memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); root_item.drop_level = 0; - brelse(subvol); - subvol = NULL; + + free_extent_buffer(leaf); + leaf = NULL; ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, 0, &objectid); @@ -1784,7 +1877,6 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &root_item); @@ -1845,7 +1937,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) struct btrfs_trans_handle *trans; struct btrfs_key key; struct btrfs_root_item new_root_item; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret; int err; u64 objectid; @@ -1876,10 +1968,11 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node)); + btrfs_set_root_blocknr(&new_root_item, + extent_buffer_blocknr(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); @@ -1904,8 +1997,10 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); + if (err && !ret) ret = err; + mutex_unlock(&root->fs_info->fs_mutex); up_write(&root->snap_sem); btrfs_btree_balance_dirty(root, nr); @@ -1986,7 +2081,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) if (copy_from_user(&vol_args, arg, sizeof(vol_args))) return -EFAULT; - + namelen = strlen(vol_args.name); if (namelen > BTRFS_VOL_NAME_MAX) return -EINVAL; @@ -2164,8 +2259,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { return -ENOTEMPTY; } + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); path = btrfs_alloc_path(); if (!path) { @@ -2177,9 +2274,10 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; + if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) { struct btrfs_key *location = &BTRFS_I(new_dir)->location; - u64 old_parent_oid; + struct btrfs_key old_parent_key; di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino, "..", 2, -1); if (IS_ERR(di)) { @@ -2190,7 +2288,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, ret = -ENOENT; goto out_fail; } - old_parent_oid = btrfs_disk_key_objectid(&di->location); + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &old_parent_key); ret = btrfs_del_item(trans, root, path); if (ret) { goto out_fail; @@ -2199,7 +2297,7 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, di = btrfs_lookup_dir_index_item(trans, root, path, old_inode->i_ino, - old_parent_oid, + old_parent_key.objectid, "..", 2, -1); if (IS_ERR(di)) { ret = PTR_ERR(di); @@ -2257,8 +2355,9 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, u64 objectid; int name_len; int datasize; - char *ptr; + unsigned long ptr; struct btrfs_file_extent_item *ei; + struct extent_buffer *leaf; unsigned long nr; name_len = strlen(symname) + 1; @@ -2302,7 +2401,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, BUG_ON(!path); key.objectid = inode->i_ino; key.offset = 0; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); datasize = btrfs_file_extent_calc_inline_size(name_len); err = btrfs_insert_empty_item(trans, root, path, &key, @@ -2311,16 +2409,17 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; goto out_unlock; } - ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(ei, trans->transid); - btrfs_set_file_extent_type(ei, + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - btrfs_memcpy(root, path->nodes[0]->b_data, - ptr, symname, name_len); - btrfs_mark_buffer_dirty(path->nodes[0]); + write_extent_buffer(leaf, symname, ptr, name_len); + btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; inode->i_size = name_len - 1; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index bafa78603bd2..a825ce078a54 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -20,10 +20,10 @@ #include "disk-io.h" #include "print-tree.h" -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) { int i; - u32 nr = btrfs_header_nritems(&l->header); + u32 nr = btrfs_header_nritems(l); struct btrfs_item *item; struct btrfs_extent_item *ei; struct btrfs_root_item *ri; @@ -31,119 +31,113 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l) struct btrfs_inode_item *ii; struct btrfs_block_group_item *bi; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(&l->header), nr, + (unsigned long long)btrfs_header_blocknr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { - item = l->items + i; - type = btrfs_disk_key_type(&item->key); + item = btrfs_item_nr(l, i); + btrfs_item_key_to_cpu(l, &key, i); + type = btrfs_key_type(&key); printk("\titem %d key (%llu %x %llu) itemoff %d itemsize %d\n", i, - (unsigned long long)btrfs_disk_key_objectid(&item->key), - btrfs_disk_key_flags(&item->key), - (unsigned long long)btrfs_disk_key_offset(&item->key), - btrfs_item_offset(item), - btrfs_item_size(item)); + (unsigned long long)key.objectid, type, + (unsigned long long)key.offset, + btrfs_item_offset(l, item), btrfs_item_size(l, item)); switch (type) { case BTRFS_INODE_ITEM_KEY: ii = btrfs_item_ptr(l, i, struct btrfs_inode_item); printk("\t\tinode generation %llu size %llu mode %o\n", - (unsigned long long)btrfs_inode_generation(ii), - (unsigned long long)btrfs_inode_size(ii), - btrfs_inode_mode(ii)); + (unsigned long long)btrfs_inode_generation(l, ii), + (unsigned long long)btrfs_inode_size(l, ii), + btrfs_inode_mode(l, ii)); break; case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); + btrfs_dir_item_key_to_cpu(l, di, &found_key); printk("\t\tdir oid %llu flags %u type %u\n", - (unsigned long long)btrfs_disk_key_objectid( - &di->location), - btrfs_dir_flags(di), - btrfs_dir_type(di)); - printk("\t\tname %.*s\n", - btrfs_dir_name_len(di),(char *)(di + 1)); + (unsigned long long)found_key.objectid, + btrfs_dir_flags(l, di), + btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_root_blocknr(ri), - btrfs_root_refs(ri)); + (unsigned long long)btrfs_disk_root_blocknr(l, ri), + btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: ei = btrfs_item_ptr(l, i, struct btrfs_extent_item); printk("\t\textent data refs %u\n", - btrfs_extent_refs(ei)); + btrfs_extent_refs(l, ei)); break; case BTRFS_EXTENT_DATA_KEY: fi = btrfs_item_ptr(l, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(fi) == + if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { printk("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l->items + i)); + btrfs_file_extent_inline_len(l, item)); break; } printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); printk("\t\textent data offset %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_offset(fi), - (unsigned long long)btrfs_file_extent_num_blocks(fi)); + (unsigned long long)btrfs_file_extent_offset(l, fi), + (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, struct btrfs_block_group_item); printk("\t\tblock group used %llu\n", - (unsigned long long)btrfs_block_group_used(bi)); - break; - case BTRFS_STRING_ITEM_KEY: - printk("\t\titem data %.*s\n", btrfs_item_size(item), - btrfs_leaf_data(l) + btrfs_item_offset(item)); + (unsigned long long)btrfs_disk_block_group_used(l, bi)); break; }; } } -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t) +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) { int i; u32 nr; - struct btrfs_node *c; + struct btrfs_key key; - if (!t) + if (!c) return; - c = btrfs_buffer_node(t); - nr = btrfs_header_nritems(&c->header); + nr = btrfs_header_nritems(c); if (btrfs_is_leaf(c)) { - btrfs_print_leaf(root, (struct btrfs_leaf *)c); + btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(&c->header), - btrfs_header_level(&c->header), nr, + (unsigned long long)btrfs_header_blocknr(c), + btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { + btrfs_node_key_to_cpu(c, &key, i); printk("\tkey %d (%llu %u %llu) block %llu\n", i, - (unsigned long long)c->ptrs[i].key.objectid, - c->ptrs[i].key.flags, - (unsigned long long)c->ptrs[i].key.offset, + (unsigned long long)key.objectid, + key.type, + (unsigned long long)key.offset, (unsigned long long)btrfs_node_blockptr(c, i)); } for (i = 0; i < nr; i++) { - struct buffer_head *next_buf = read_tree_block(root, + struct extent_buffer *next = read_tree_block(root, btrfs_node_blockptr(c, i)); - struct btrfs_node *next = btrfs_buffer_node(next_buf); if (btrfs_is_leaf(next) && - btrfs_header_level(&c->header) != 1) + btrfs_header_level(c) != 1) BUG(); - if (btrfs_header_level(&next->header) != - btrfs_header_level(&c->header) - 1) + if (btrfs_header_level(next) != + btrfs_header_level(c) - 1) BUG(); - btrfs_print_tree(root, next_buf); - btrfs_block_release(root, next_buf); + btrfs_print_tree(root, next); + free_extent_buffer(next); } } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 205cd03601ab..da75efe534d5 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -18,6 +18,6 @@ #ifndef __PRINT_TREE_ #define __PRINT_TREE_ -void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l); -void btrfs_print_tree(struct btrfs_root *root, struct buffer_head *t); +void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l); +void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t); #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 3b5926dfbeba..88bcdd33f56e 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -26,12 +26,13 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, { struct btrfs_path *path; struct btrfs_key search_key; - struct btrfs_leaf *l; + struct btrfs_key found_key; + struct extent_buffer *l; int ret; int slot; search_key.objectid = objectid; - search_key.flags = (u32)-1; + search_key.type = (u8)-1; search_key.offset = (u64)-1; path = btrfs_alloc_path(); @@ -39,17 +40,19 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) goto out; + BUG_ON(ret == 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; BUG_ON(path->slots[0] == 0); slot = path->slots[0] - 1; - if (btrfs_disk_key_objectid(&l->items[slot].key) != objectid) { + btrfs_item_key_to_cpu(l, &found_key, slot); + if (found_key.objectid != objectid) { ret = 1; goto out; } - memcpy(item, btrfs_item_ptr(l, slot, struct btrfs_root_item), - sizeof(*item)); - btrfs_disk_key_to_cpu(key, &l->items[slot].key); + read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot), + sizeof(*item)); + memcpy(key, &found_key, sizeof(found_key)); ret = 0; out: btrfs_release_path(root, path); @@ -62,10 +65,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *item) { struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; int ret; int slot; - struct btrfs_root_item *update_item; + unsigned long ptr; path = btrfs_alloc_path(); BUG_ON(!path); @@ -73,10 +76,10 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root if (ret < 0) goto out; BUG_ON(ret != 0); - l = btrfs_buffer_leaf(path->nodes[0]); + l = path->nodes[0]; slot = path->slots[0]; - update_item = btrfs_item_ptr(l, slot, struct btrfs_root_item); - btrfs_memcpy(root, l, update_item, item, sizeof(*item)); + ptr = btrfs_item_ptr_offset(l, slot); + write_extent_buffer(l, item, ptr, sizeof(*item)); btrfs_mark_buffer_dirty(path->nodes[0]); out: btrfs_release_path(root, path); @@ -103,11 +106,10 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_path *path; int ret; u32 nritems; - struct btrfs_leaf *leaf; + struct extent_buffer *leaf; int slot; key.objectid = objectid; - key.flags = 0; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); key.offset = 0; path = btrfs_alloc_path(); @@ -117,19 +119,19 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, if (ret < 0) goto err; while(1) { - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; if (slot >= nritems) { ret = btrfs_next_leaf(root, path); if (ret) break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); slot = path->slots[0]; } - item = leaf->items + slot; - btrfs_disk_key_to_cpu(&key, &item->key); + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &key, slot); if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) goto next; @@ -140,7 +142,7 @@ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, break; ri = btrfs_item_ptr(leaf, slot, struct btrfs_root_item); - if (btrfs_root_refs(ri) != 0) + if (btrfs_disk_root_refs(leaf, ri) != 0) goto next; dead_root = btrfs_read_fs_root_no_radix(root->fs_info, &key); @@ -170,6 +172,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, int ret; u32 refs; struct btrfs_root_item *ri; + struct extent_buffer *leaf; path = btrfs_alloc_path(); BUG_ON(!path); @@ -177,10 +180,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (ret < 0) goto out; BUG_ON(ret != 0); - ri = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), - path->slots[0], struct btrfs_root_item); + leaf = path->nodes[0]; + ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); - refs = btrfs_root_refs(ri); + refs = btrfs_disk_root_refs(leaf, ri); BUG_ON(refs != 0); ret = btrfs_del_item(trans, root, path); out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 66a01cbbbea1..39a1435c68f1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -41,7 +41,7 @@ #include "ioctl.h" #include "print-tree.h" -#define BTRFS_SUPER_MAGIC 0x9123682E +#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; @@ -115,13 +115,12 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) return -EIO; } sb->s_fs_info = tree_root; - disk_super = tree_root->fs_info->disk_super; + disk_super = &tree_root->fs_info->super_copy; inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super), tree_root); bi = BTRFS_I(inode); bi->location.objectid = inode->i_ino; bi->location.offset = 0; - bi->location.flags = 0; bi->root = tree_root; btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY); @@ -281,6 +280,7 @@ error_s: error_bdev: close_bdev_excl(bdev); error: +printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,6 +295,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); +printk("btrfs_get_sb returns %d\n", ret); return ret; } diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 2058783373eb..9654e90eec89 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -31,31 +31,31 @@ static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_blocks_used(&root->root_item)); + (unsigned long long)btrfs_root_used(&root->root_item)); } static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_root_block_limit(&root->root_item)); + (unsigned long long)btrfs_root_limit(&root->root_item)); } static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(fs->disk_super)); + (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(fs->disk_super)); + (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocksize(fs->disk_super)); + (unsigned long long)btrfs_super_sectorsize(&fs->super_copy)); } /* this is for root attrs (subvols/snapshots) */ diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 087074db0bd5..750f35a37aae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -66,7 +67,9 @@ static int join_transaction(struct btrfs_root *root) cur_trans->commit_done = 0; cur_trans->start_time = get_seconds(); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); - init_bit_radix(&cur_trans->dirty_pages); + extent_map_tree_init(&cur_trans->dirty_pages, + root->fs_info->btree_inode->i_mapping, + GFP_NOFS); } else { cur_trans->num_writers++; cur_trans->num_joined++; @@ -88,7 +91,7 @@ static int record_root_in_trans(struct btrfs_root *root) (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); root->commit_root = root->node; - get_bh(root->node); + extent_buffer_get(root->node); } else { WARN_ON(1); } @@ -144,29 +147,30 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - unsigned long gang[16]; int ret; - int i; int err; int werr = 0; + struct extent_map_tree *dirty_pages; struct page *page; - struct radix_tree_root *dirty_pages; struct inode *btree_inode = root->fs_info->btree_inode; + u64 start; + u64 end; + unsigned long index; if (!trans || !trans->transaction) { return filemap_write_and_wait(btree_inode->i_mapping); } dirty_pages = &trans->transaction->dirty_pages; while(1) { - ret = find_first_radix_bit(dirty_pages, gang, - 0, ARRAY_SIZE(gang)); - if (!ret) + ret = find_first_extent_bit(dirty_pages, 0, &start, &end, + EXTENT_DIRTY); + if (ret) break; - for (i = 0; i < ret; i++) { - /* FIXME EIO */ - clear_radix_bit(dirty_pages, gang[i]); - page = find_lock_page(btree_inode->i_mapping, - gang[i]); + clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); + while(start <= end) { + index = start >> PAGE_CACHE_SHIFT; + start = (index + 1) << PAGE_CACHE_SHIFT; + page = find_lock_page(btree_inode->i_mapping, index); if (!page) continue; if (PageWriteback(page)) { @@ -202,10 +206,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == bh_blocknr(extent_root->node)) + if (old_extent_block == + extent_buffer_blocknr(extent_root->node)) break; btrfs_set_root_blocknr(&extent_root->root_item, - bh_blocknr(extent_root->node)); + extent_buffer_blocknr(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -279,9 +284,9 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(bh_blocknr(root->node) != + WARN_ON(extent_buffer_blocknr(root->node) != btrfs_root_blocknr(&root->root_item)); - brelse(root->commit_root); + free_extent_buffer(root->commit_root); root->commit_root = NULL; /* make sure to update the root on disk @@ -310,7 +315,7 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->root_key.offset = root->fs_info->generation; btrfs_set_root_blocknr(&root->root_item, - bh_blocknr(root->node)); + extent_buffer_blocknr(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -389,10 +394,10 @@ int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) for (i = 0; i < ret; i++) { root = gang[i]; last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); + // btrfs_defrag_root(root, 1); } } - btrfs_defrag_root(info->extent_root, 1); + // btrfs_defrag_root(info->extent_root, 1); return err; } @@ -414,7 +419,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_blocks_used(&dirty->root->root_item); + num_blocks = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -441,11 +446,11 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_blocks_used(&dirty->root->root_item); - blocks_used = btrfs_root_blocks_used(&root->root_item); + num_blocks -= btrfs_root_used(&dirty->root->root_item); + blocks_used = btrfs_root_used(&root->root_item); if (num_blocks) { record_root_in_trans(root); - btrfs_set_root_blocks_used(&root->root_item, + btrfs_set_root_used(&root->root_item, blocks_used - num_blocks); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); @@ -553,9 +558,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - bh_blocknr(root->fs_info->tree_root->node)); - memcpy(root->fs_info->disk_super, &root->fs_info->super_copy, - sizeof(root->fs_info->super_copy)); + extent_buffer_blocknr(root->fs_info->tree_root->node)); + + write_extent_buffer(root->fs_info->sb_buffer, + &root->fs_info->super_copy, 0, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, &pinned_copy); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 4bc328cbb24c..ae39fcfc169a 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -28,7 +28,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct radix_tree_root dirty_pages; + struct extent_map_tree dirty_pages; unsigned long start_time; wait_queue_head_t writer_wait; wait_queue_head_t commit_wait; @@ -83,5 +83,6 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); - +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 420597127ed1..daf019afa0a1 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,14 +23,14 @@ #include "transaction.h" static void reada_defrag(struct btrfs_root *root, - struct btrfs_node *node) + struct extent_buffer *node) { int i; u32 nritems; u64 blocknr; int ret; - nritems = btrfs_header_nritems(&node->header); + nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { blocknr = btrfs_node_blockptr(node, i); ret = readahead_tree_block(root, blocknr); @@ -44,8 +44,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, struct btrfs_path *path, int *level, int cache_only, u64 *last_ret) { - struct buffer_head *next; - struct buffer_head *cur; + struct extent_buffer *next; + struct extent_buffer *cur; u64 blocknr; int ret = 0; int is_extent = 0; @@ -62,13 +62,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, cur = path->nodes[*level]; if (!cache_only && *level > 1 && path->slots[*level] == 0) - reada_defrag(root, btrfs_buffer_node(cur)); + reada_defrag(root, cur); - if (btrfs_header_level(btrfs_buffer_header(cur)) != *level) + if (btrfs_header_level(cur) != *level) WARN_ON(1); if (path->slots[*level] >= - btrfs_header_nritems(btrfs_buffer_header(cur))) + btrfs_header_nritems(cur)) break; if (*level == 1) { @@ -80,14 +80,13 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur), - path->slots[*level]); + blocknr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { next = btrfs_find_tree_block(root, blocknr); - if (!next || !buffer_uptodate(next) || - buffer_locked(next) || !buffer_defrag(next)) { - brelse(next); + /* FIXME, test for defrag */ + if (!next || !btrfs_buffer_uptodate(next)) { + free_extent_buffer(next); path->slots[*level]++; continue; } @@ -106,16 +105,18 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, WARN_ON(*level <= 0); if (path->nodes[*level-1]) - btrfs_block_release(root, path->nodes[*level-1]); + free_extent_buffer(path->nodes[*level-1]); path->nodes[*level-1] = next; - *level = btrfs_header_level(btrfs_buffer_header(next)); + *level = btrfs_header_level(next); path->slots[*level] = 0; } WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); +#if 0 clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); +#endif + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; WARN_ON(ret); @@ -129,24 +130,25 @@ static int defrag_walk_up(struct btrfs_trans_handle *trans, { int i; int slot; - struct btrfs_node *node; + struct extent_buffer *node; for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { slot = path->slots[i]; - if (slot < btrfs_header_nritems( - btrfs_buffer_header(path->nodes[i])) - 1) { + if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { path->slots[i]++; *level = i; - node = btrfs_buffer_node(path->nodes[i]); + node = path->nodes[i]; WARN_ON(i == 0); - btrfs_disk_key_to_cpu(&root->defrag_progress, - &node->ptrs[path->slots[i]].key); + btrfs_node_key_to_cpu(node, &root->defrag_progress, + path->slots[i]); root->defrag_level = i; return 0; } else { + /* clear_buffer_defrag(path->nodes[*level]); clear_buffer_defrag_done(path->nodes[*level]); - btrfs_block_release(root, path->nodes[*level]); + */ + free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level = i + 1; } @@ -158,7 +160,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { struct btrfs_path *path = NULL; - struct buffer_head *tmp; + struct extent_buffer *tmp; int ret = 0; int wret; int level; @@ -172,17 +174,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; - level = btrfs_header_level(btrfs_buffer_header(root->node)); + level = btrfs_header_level(root->node); orig_level = level; if (level == 0) { goto out; } if (root->defrag_progress.objectid == 0) { - get_bh(root->node); + extent_buffer_get(root->node); ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); BUG_ON(ret); ret = btrfs_realloc_node(trans, root, root->node, cache_only, @@ -200,12 +203,15 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (is_extent) btrfs_extent_post_op(trans, root); + if (wret < 0) { ret = wret; goto out; } + while(level > 0 && !path->nodes[level]) level--; + if (!path->nodes[level]) { ret = 0; goto out; @@ -230,7 +236,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { - btrfs_block_release(root, path->nodes[i]); + free_extent_buffer(path->nodes[i]); path->nodes[i] = 0; } } -- cgit v1.2.3 From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:53 -0400 Subject: Btrfs: Allow tree blocks larger than the page size Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 319 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ctree.h | 112 ++++++++++------- fs/btrfs/disk-io.c | 92 ++++++++------ fs/btrfs/disk-io.h | 9 +- fs/btrfs/extent-tree.c | 285 ++++++++++++++++++++++--------------------- fs/btrfs/extent_map.c | 91 ++++++++++---- fs/btrfs/extent_map.h | 10 +- fs/btrfs/file-item.c | 10 +- fs/btrfs/file.c | 126 ++++++++++--------- fs/btrfs/inode.c | 90 +++++++------- fs/btrfs/print-tree.c | 23 ++-- fs/btrfs/super.c | 6 +- fs/btrfs/sysfs.c | 5 +- fs/btrfs/transaction.c | 39 +++--- fs/btrfs/tree-defrag.c | 18 +-- 15 files changed, 738 insertions(+), 497 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50ef351ef6b3..34de83630ae9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -83,16 +83,15 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(root->ref_cows && trans->transid != root->last_trans); - cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); + cow = btrfs_alloc_free_block(trans, root, buf->len, + search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); cow->alloc_addr = (unsigned long)__builtin_return_address(0); - if (buf->len != root->sectorsize || cow->len != root->sectorsize) - WARN_ON(1); copy_extent_buffer(cow, buf, 0, 0, cow->len); - btrfs_set_header_blocknr(cow, extent_buffer_blocknr(cow)); + btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); btrfs_set_header_owner(cow, root->root_key.objectid); @@ -110,16 +109,16 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans, root->node = cow; extent_buffer_get(cow); if (buf != root->commit_root) { - btrfs_free_extent(trans, root, - extent_buffer_blocknr(buf), 1, 1); + btrfs_free_extent(trans, root, buf->start, + buf->len, 1); } free_extent_buffer(buf); } else { btrfs_set_node_blockptr(parent, parent_slot, - extent_buffer_blocknr(cow)); + cow->start); btrfs_mark_buffer_dirty(parent); WARN_ON(btrfs_header_generation(parent) != trans->transid); - btrfs_free_extent(trans, root, extent_buffer_blocknr(buf),1,1); + btrfs_free_extent(trans, root, buf->start, buf->len, 1); } free_extent_buffer(buf); btrfs_mark_buffer_dirty(cow); @@ -149,13 +148,14 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = extent_buffer_blocknr(buf) & ~((u64)65535); + search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0); return ret; } +#if 0 static int close_blocks(u64 blocknr, u64 other) { if (blocknr < other && other - blocknr < 8) @@ -165,7 +165,6 @@ static int close_blocks(u64 blocknr, u64 other) return 0; } -#if 0 static int should_defrag_leaf(struct extent_buffer *eb) { return 0; @@ -355,7 +354,7 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &node_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(node)); + btrfs_header_bytenr(node)); } BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != 0) { @@ -398,7 +397,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, BUG_ON(memcmp(&parent_key, &leaf_key, sizeof(struct btrfs_disk_key))); BUG_ON(btrfs_node_blockptr(parent, parent_slot) != - btrfs_header_blocknr(leaf)); + btrfs_header_bytenr(leaf)); } #if 0 for (i = 0; nritems > 1 && i < nritems - 2; i++) { @@ -467,14 +466,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, static int check_block(struct btrfs_root *root, struct btrfs_path *path, int level) { +#if 0 struct extent_buffer *buf = path->nodes[level]; if (memcmp_extent_buffer(buf, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(buf), BTRFS_FSID_SIZE)) { printk("warning bad block %Lu\n", buf->start); - BUG(); + return 1; } +#endif if (level == 0) return check_leaf(root, path, level); return check_node(root, path, level); @@ -585,7 +586,8 @@ static struct extent_buffer *read_node_slot(struct btrfs_root *root, return NULL; if (slot >= btrfs_header_nritems(parent)) return NULL; - return read_tree_block(root, btrfs_node_blockptr(parent, slot)); + return read_tree_block(root, btrfs_node_blockptr(parent, slot), + btrfs_level_size(root, btrfs_header_level(parent) - 1)); } static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root @@ -618,7 +620,6 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root */ if (!parent) { struct extent_buffer *child; - u64 blocknr = extent_buffer_blocknr(mid); if (btrfs_header_nritems(mid) != 1) return 0; @@ -632,9 +633,10 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wait_on_tree_block_writeback(root, mid); /* once for the path */ free_extent_buffer(mid); + ret = btrfs_free_extent(trans, root, mid->start, mid->len, 1); /* once for the root ptr */ free_extent_buffer(mid); - return btrfs_free_extent(trans, root, blocknr, 1, 1); + return ret; } if (btrfs_header_nritems(mid) > BTRFS_NODEPTRS_PER_BLOCK(root) / 4) @@ -680,7 +682,9 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root if (wret < 0 && wret != -ENOSPC) ret = wret; if (btrfs_header_nritems(right) == 0) { - u64 blocknr = extent_buffer_blocknr(right); + u64 bytenr = right->start; + u32 blocksize = right->len; + clean_tree_block(trans, root, right); wait_on_tree_block_writeback(root, right); free_extent_buffer(right); @@ -689,7 +693,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 1); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -719,7 +724,8 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 blocknr = extent_buffer_blocknr(mid); + u64 bytenr = mid->start; + u32 blocksize = mid->len; clean_tree_block(trans, root, mid); wait_on_tree_block_writeback(root, mid); free_extent_buffer(mid); @@ -727,7 +733,7 @@ static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root wret = del_ptr(trans, root, path, level + 1, pslot); if (wret) ret = wret; - wret = btrfs_free_extent(trans, root, blocknr, 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1); if (wret) ret = wret; } else { @@ -830,7 +836,6 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, path->slots[level] = orig_slot; free_extent_buffer(left); } - check_node(root, path, level); return 0; } free_extent_buffer(left); @@ -874,12 +879,10 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, } else { free_extent_buffer(right); } - check_node(root, path, level); return 0; } free_extent_buffer(right); } - check_node(root, path, level); return 1; } @@ -889,19 +892,23 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans, static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, int level, int slot) { + return; +#if 0 struct extent_buffer *node; int i; u32 nritems; - u64 blocknr; + u64 bytenr; u64 search; u64 cluster_start; int ret; int nread = 0; int direction = path->reada; + int level; struct radix_tree_root found; unsigned long gang[8]; struct extent_buffer *eb; + if (level == 0) return; @@ -918,8 +925,9 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, init_bit_radix(&found); nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node) - 1; for (i = slot; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); + bytenr = btrfs_node_blockptr(node, i); set_radix_bit(&found, blocknr); } if (direction > 0) { @@ -944,6 +952,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, } } } +#endif } /* * look for key in the tree. path is filled in with nodes along the way @@ -963,7 +972,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ins_len, int cow) { struct extent_buffer *b; - u64 blocknr; + u64 bytenr; int slot; int ret; int level; @@ -1027,10 +1036,11 @@ again: /* this is only true while dropping a snapshot */ if (level == lowest_level) break; - blocknr = btrfs_node_blockptr(b, slot); + bytenr = btrfs_node_blockptr(b, slot); if (should_reada) reada_for_search(root, p, level, slot); - b = read_tree_block(root, btrfs_node_blockptr(b, slot)); + b = read_tree_block(root, bytenr, + btrfs_level_size(root, level - 1)); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1193,14 +1203,14 @@ static int insert_new_root(struct btrfs_trans_handle *trans, BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); - c = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(root->node), 0); + c = btrfs_alloc_free_block(trans, root, root->nodesize, + root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); memset_extent_buffer(c, 0, 0, root->nodesize); btrfs_set_header_nritems(c, 1); btrfs_set_header_level(c, level); - btrfs_set_header_blocknr(c, extent_buffer_blocknr(c)); + btrfs_set_header_bytenr(c, c->start); btrfs_set_header_generation(c, trans->transid); btrfs_set_header_owner(c, root->root_key.objectid); lower = path->nodes[level-1]; @@ -1213,7 +1223,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, else btrfs_node_key(lower, &lower_key, 0); btrfs_set_node_key(c, &lower_key, 0); - btrfs_set_node_blockptr(c, 0, extent_buffer_blocknr(lower)); + btrfs_set_node_blockptr(c, 0, lower->start); btrfs_mark_buffer_dirty(c); @@ -1237,7 +1247,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, */ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key - *key, u64 blocknr, int slot, int level) + *key, u64 bytenr, int slot, int level) { struct extent_buffer *lower; int nritems; @@ -1256,10 +1266,9 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root (nritems - slot) * sizeof(struct btrfs_key_ptr)); } btrfs_set_node_key(lower, key, slot); - btrfs_set_node_blockptr(lower, slot, blocknr); + btrfs_set_node_blockptr(lower, slot, bytenr); btrfs_set_header_nritems(lower, nritems + 1); btrfs_mark_buffer_dirty(lower); - check_node(root, path, level); return 0; } @@ -1300,14 +1309,14 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root } c_nritems = btrfs_header_nritems(c); - split = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(c), 0); + split = btrfs_alloc_free_block(trans, root, root->nodesize, + c->start, 0); if (IS_ERR(split)) return PTR_ERR(split); btrfs_set_header_flags(split, btrfs_header_flags(c)); btrfs_set_header_level(split, btrfs_header_level(c)); - btrfs_set_header_blocknr(split, extent_buffer_blocknr(split)); + btrfs_set_header_bytenr(split, split->start); btrfs_set_header_generation(split, trans->transid); btrfs_set_header_owner(split, root->root_key.objectid); write_extent_buffer(split, root->fs_info->fsid, @@ -1328,8 +1337,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(split); btrfs_node_key(split, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(split), + wret = insert_ptr(trans, root, path, &disk_key, split->start, path->slots[level + 1] + 1, level + 1); if (wret) @@ -1407,6 +1415,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root u32 left_nritems; u32 right_nritems; u32 data_end; + u32 this_item_size; int ret; slot = path->slots[1]; @@ -1417,7 +1426,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; - right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1)); + right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1), + root->leafsize); free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(right); @@ -1445,13 +1455,27 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root for (i = left_nritems - 1; i >= 1; i--) { item = btrfs_item_nr(left, i); + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(left, item) + sizeof(*item) + push_space > - free_space) + + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + + this_item_size = btrfs_item_size(left, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; push_items++; - push_space += btrfs_item_size(left, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; } if (push_items == 0) { @@ -1493,11 +1517,23 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right_nritems += push_items; btrfs_set_header_nritems(right, right_nritems); push_space = BTRFS_LEAF_DATA_SIZE(root); + for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + push_space -= btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -1518,8 +1554,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root } else { free_extent_buffer(right); } - if (path->nodes[1]) - check_node(root, path, 1); return 0; } /* @@ -1542,6 +1576,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root u32 right_nritems; int ret = 0; int wret; + u32 this_item_size; + u32 old_left_item_size; slot = path->slots[1]; if (slot == 0) @@ -1550,7 +1586,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1], - slot - 1)); + slot - 1), root->leafsize); free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size + sizeof(struct btrfs_item)) { free_extent_buffer(left); @@ -1579,14 +1615,30 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems - 1; i++) { item = btrfs_item_nr(right, i); + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + if (path->slots[0] == i) push_space += data_size + sizeof(*item); - if (btrfs_item_size(right, item) + sizeof(*item) + push_space > - free_space) + + this_item_size = btrfs_item_size(right, item); + if (this_item_size + sizeof(*item) + push_space > free_space) break; + push_items++; - push_space += btrfs_item_size(right, item) + sizeof(*item); + push_space += this_item_size + sizeof(*item); + } + + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } + if (push_items == 0) { free_extent_buffer(left); return 1; @@ -1611,15 +1663,28 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root old_left_nritems = btrfs_header_nritems(left); BUG_ON(old_left_nritems < 0); + old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { u32 ioff; + item = btrfs_item_nr(left, i); + if (!left->map_token) { + map_extent_buffer(left, (unsigned long)item, + sizeof(struct btrfs_item), + &left->map_token, &left->kaddr, + &left->map_start, &left->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, - ioff - (BTRFS_LEAF_DATA_SIZE(root) - - btrfs_item_offset_nr(left, old_left_nritems - 1))); + ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); + if (left->map_token) { + unmap_extent_buffer(left, left->map_token, KM_USER1); + left->map_token = NULL; + } /* fixup right node */ push_space = btrfs_item_offset_nr(right, push_items - 1) - @@ -1640,9 +1705,21 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - btrfs_set_item_offset(right, item, push_space - - btrfs_item_size(right, item)); - push_space = btrfs_item_offset(right, item); + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + push_space = push_space - btrfs_item_size(right, item); + btrfs_set_item_offset(right, item, push_space); + } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; } btrfs_mark_buffer_dirty(left); @@ -1664,8 +1741,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root path->slots[0] -= push_items; } BUG_ON(path->slots[0] < 0); - if (path->nodes[1]) - check_node(root, path, 1); return ret; } @@ -1718,13 +1793,13 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root nritems = btrfs_header_nritems(l); mid = (nritems + 1)/ 2; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1740,8 +1815,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1762,7 +1836,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), + right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1799,15 +1873,30 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root for (i = 0; i < nritems; i++) { struct btrfs_item *item = btrfs_item_nr(right, i); - u32 ioff = btrfs_item_offset(right, item); + u32 ioff; + + if (!right->map_token) { + map_extent_buffer(right, (unsigned long)item, + sizeof(struct btrfs_item), + &right->map_token, &right->kaddr, + &right->map_start, &right->map_len, + KM_USER1); + } + + ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } + if (right->map_token) { + unmap_extent_buffer(right, right->map_token, KM_USER1); + right->map_token = NULL; + } + btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); - wret = insert_ptr(trans, root, path, &disk_key, - extent_buffer_blocknr(right), path->slots[1] + 1, 1); + wret = insert_ptr(trans, root, path, &disk_key, right->start, + path->slots[1] + 1, 1); if (wret) ret = wret; @@ -1824,19 +1913,17 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(right); BUG_ON(path->slots[0] < 0); - check_node(root, path, 1); - check_leaf(root, path, 0); if (!double_split) return ret; - right = btrfs_alloc_free_block(trans, root, - extent_buffer_blocknr(l), 0); + right = btrfs_alloc_free_block(trans, root, root->leafsize, + l->start, 0); if (IS_ERR(right)) return PTR_ERR(right); memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); - btrfs_set_header_blocknr(right, extent_buffer_blocknr(right)); + btrfs_set_header_bytenr(right, right->start); btrfs_set_header_generation(right, trans->transid); btrfs_set_header_owner(right, root->root_key.objectid); btrfs_set_header_level(right, 0); @@ -1847,8 +1934,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, ins_key); btrfs_set_header_nritems(right, 0); wret = insert_ptr(trans, root, path, - &disk_key, - extent_buffer_blocknr(right), + &disk_key, right->start, path->slots[1], 1); if (wret) ret = wret; @@ -1860,8 +1946,6 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root free_extent_buffer(path->nodes[0]); path->nodes[0] = right; path->slots[0] = 0; - check_node(root, path, 1); - check_leaf(root, path, 0); return ret; } @@ -1904,9 +1988,24 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end + size_diff, btrfs_leaf_data(leaf) + @@ -1921,7 +2020,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -1963,10 +2061,23 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); + + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -1983,7 +2094,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); return ret; } @@ -2046,12 +2156,26 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ + WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } + ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), @@ -2081,7 +2205,6 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, btrfs_print_leaf(root, leaf); BUG(); } - check_leaf(root, path, 0); out: return ret; } @@ -2186,10 +2309,24 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, for (i = slot + 1; i < nritems; i++) { u32 ioff; + item = btrfs_item_nr(leaf, i); + if (!leaf->map_token) { + map_extent_buffer(leaf, (unsigned long)item, + sizeof(struct btrfs_item), + &leaf->map_token, &leaf->kaddr, + &leaf->map_start, &leaf->map_len, + KM_USER1); + } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } + + if (leaf->map_token) { + unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); + leaf->map_token = NULL; + } + memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + 1), sizeof(struct btrfs_item) * @@ -2209,8 +2346,7 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(leaf), - 1, 1); + leaf->start, leaf->len, 1); if (wret) ret = wret; } @@ -2247,7 +2383,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, } if (btrfs_header_nritems(leaf) == 0) { - u64 blocknr = extent_buffer_blocknr(leaf); + u64 bytenr = leaf->start; + u32 blocksize = leaf->len; clean_tree_block(trans, root, leaf); wait_on_tree_block_writeback(root, leaf); @@ -2257,8 +2394,8 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = wret; free_extent_buffer(leaf); - wret = btrfs_free_extent(trans, root, blocknr, - 1, 1); + wret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); if (wret) ret = wret; } else { @@ -2281,7 +2418,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) { int slot; int level = 1; - u64 blocknr; + u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2296,14 +2433,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) continue; } - blocknr = btrfs_node_blockptr(c, slot); + bytenr = btrfs_node_blockptr(c, slot); if (next) free_extent_buffer(next); if (path->reada) reada_for_search(root, path, level, slot); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, level -1)); break; } path->slots[level] = slot; @@ -2317,7 +2455,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) break; if (path->reada) reada_for_search(root, path, level, 0); - next = read_tree_block(root, btrfs_node_blockptr(next, 0)); + next = read_tree_block(root, btrfs_node_blockptr(next, 0), + btrfs_level_size(root, level - 1)); } return 0; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 256689551eb0..4d05456ec32f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -96,7 +96,7 @@ struct btrfs_key { struct btrfs_header { u8 csum[BTRFS_CSUM_SIZE]; u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ - __le64 blocknr; /* which block this node is supposed to live in */ + __le64 bytenr; /* which block this node is supposed to live in */ __le64 generation; __le64 owner; __le32 nritems; @@ -122,16 +122,17 @@ struct btrfs_super_block { u8 csum[BTRFS_CSUM_SIZE]; /* the first 3 fields must match struct btrfs_header */ u8 fsid[16]; /* FS specific uuid */ - __le64 blocknr; /* this block number */ + __le64 bytenr; /* this block number */ __le64 magic; __le64 generation; __le64 root; - __le64 total_blocks; - __le64 blocks_used; + __le64 total_bytes; + __le64 bytes_used; __le64 root_dir_objectid; __le32 sectorsize; __le32 nodesize; __le32 leafsize; + u8 root_level; } __attribute__ ((__packed__)); /* @@ -231,13 +232,14 @@ struct btrfs_dir_item { struct btrfs_root_item { struct btrfs_inode_item inode; __le64 root_dirid; - __le64 blocknr; - __le64 block_limit; - __le64 blocks_used; + __le64 bytenr; + __le64 byte_limit; + __le64 bytes_used; __le32 flags; __le32 refs; struct btrfs_disk_key drop_progress; u8 drop_level; + u8 level; } __attribute__ ((__packed__)); #define BTRFS_FILE_EXTENT_REG 0 @@ -250,8 +252,8 @@ struct btrfs_file_extent_item { * disk space consumed by the extent, checksum blocks are included * in these numbers */ - __le64 disk_blocknr; - __le64 disk_num_blocks; + __le64 disk_bytenr; + __le64 disk_num_bytes; /* * the logical offset in file blocks (no csums) * this extent record is for. This allows a file extent to point @@ -263,7 +265,7 @@ struct btrfs_file_extent_item { /* * the logical number of file blocks (no csums included) */ - __le64 num_blocks; + __le64 num_bytes; } __attribute__ ((__packed__)); struct btrfs_csum_item { @@ -429,6 +431,7 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ int err; \ char *map_token; \ char *kaddr; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = (unsigned long)s + \ @@ -436,12 +439,13 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb, \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -457,17 +461,19 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ char *kaddr; \ unsigned long map_start; \ unsigned long map_len; \ + int unmap_on_exit = (eb->map_token == NULL); \ unsigned long offset = (unsigned long)s + \ offsetof(type, member); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, s, type, member, &val); \ @@ -483,15 +489,17 @@ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ u##bits res = le##bits##_to_cpu(*tmp); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ return res; \ } else { \ __le##bits res; \ @@ -508,15 +516,17 @@ static inline void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long map_start; \ unsigned long map_len; \ unsigned long offset = offsetof(type, member); \ + int unmap_on_exit = (eb->map_token == NULL); \ err = map_extent_buffer(eb, offset, \ sizeof(((type *)0)->member), \ &map_token, &kaddr, \ - &map_start, &map_len, KM_USER0); \ + &map_start, &map_len, KM_USER1); \ if (!err) { \ __le##bits *tmp = (__le##bits *)(kaddr + offset - \ map_start); \ *tmp = cpu_to_le##bits(val); \ - unmap_extent_buffer(eb, map_token, KM_USER0); \ + if (unmap_on_exit) \ + unmap_extent_buffer(eb, map_token, KM_USER1); \ } else { \ val = cpu_to_le##bits(val); \ write_eb_member(eb, NULL, type, member, &val); \ @@ -769,7 +779,7 @@ static inline void btrfs_set_key_type(struct btrfs_key *key, u8 val) } /* struct btrfs_header */ -BTRFS_SETGET_HEADER_FUNCS(header_blocknr, struct btrfs_header, blocknr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); @@ -817,24 +827,28 @@ static inline int btrfs_is_leaf(struct extent_buffer *eb) /* struct btrfs_root_item */ BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_FUNCS(disk_root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); -BTRFS_SETGET_STACK_FUNCS(root_blocknr, struct btrfs_root_item, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 32); -BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, blocks_used, 64); -BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, block_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ -BTRFS_SETGET_STACK_FUNCS(super_blocknr, struct btrfs_super_block, blocknr, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); -BTRFS_SETGET_STACK_FUNCS(super_total_blocks, struct btrfs_super_block, - total_blocks, 64); -BTRFS_SETGET_STACK_FUNCS(super_blocks_used, struct btrfs_super_block, - blocks_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, sectorsize, 32); BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, @@ -856,33 +870,33 @@ static inline unsigned long btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e) { unsigned long offset = (unsigned long)e; - offset += offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset += offsetof(struct btrfs_file_extent_item, disk_bytenr); return offset; } static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) { - return offsetof(struct btrfs_file_extent_item, disk_blocknr) + datasize; + return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize; } static inline u32 btrfs_file_extent_inline_len(struct extent_buffer *eb, struct btrfs_item *e) { unsigned long offset; - offset = offsetof(struct btrfs_file_extent_item, disk_blocknr); + offset = offsetof(struct btrfs_file_extent_item, disk_bytenr); return btrfs_item_size(eb, e) - offset; } -BTRFS_SETGET_FUNCS(file_extent_disk_blocknr, struct btrfs_file_extent_item, - disk_blocknr, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, generation, 64); -BTRFS_SETGET_FUNCS(file_extent_disk_num_blocks, struct btrfs_file_extent_item, - disk_num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, offset, 64); -BTRFS_SETGET_FUNCS(file_extent_num_blocks, struct btrfs_file_extent_item, - num_blocks, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); static inline struct btrfs_root *btrfs_sb(struct super_block *sb) { @@ -906,6 +920,12 @@ static inline int btrfs_set_root_name(struct btrfs_root *root, return 0; } +static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { + if (level == 0) + return root->leafsize; + return root->nodesize; +} + /* helper function to cast into the data area of the leaf. */ #define btrfs_item_ptr(leaf, slot, type) \ ((type *)(btrfs_leaf_data(leaf) + \ @@ -927,7 +947,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr); + u64 bytenr); struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, struct btrfs_block_group_cache *hint, u64 search_start, @@ -935,22 +955,22 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, - u64 empty_size); + struct btrfs_root *root, u32 size, + u64 hint, u64 empty_size); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 search_start, + u64 num_bytes, u64 empty_size, u64 search_start, u64 search_end, struct btrfs_key *ins, int data); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin); + *root, u64 bytenr, u64 num_bytes, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_map_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks); + u64 bytenr, u64 num_bytes); int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_free_block_groups(struct btrfs_fs_info *info); @@ -1040,12 +1060,12 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, u64 offset, - u64 disk_num_blocks, - u64 num_blocks); + u64 disk_num_bytes, + u64 num_bytes); int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid, - u64 blocknr, int mod); + u64 bytenr, int mod); int btrfs_csum_file_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 offset, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b86a1d779b7..fad9298c6962 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "print-tree.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -43,26 +44,25 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); if (eb) eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -208,13 +208,13 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -223,12 +223,13 @@ int readahead_tree_block(struct btrfs_root *root, u64 blocknr) return ret; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -261,7 +262,7 @@ int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) return 0; } -static int __setup_root(int blocksize, +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -269,9 +270,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->sectorsize = blocksize; - root->nodesize = blocksize; - root->leafsize = blocksize; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -291,21 +292,23 @@ static int __setup_root(int blocksize, return 0; } -static int find_and_setup_root(int blocksize, - struct btrfs_root *tree_root, +static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; + u32 blocksize; - __setup_root(blocksize, root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); return 0; } @@ -318,14 +321,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u32 blocksize; int ret = 0; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { - ret = find_and_setup_root(fs_info->sb->s_blocksize, - fs_info->tree_root, fs_info, + ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { kfree(root); @@ -334,7 +337,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(fs_info->sb->s_blocksize, root, fs_info, + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, location->objectid); path = btrfs_alloc_path(); @@ -357,8 +361,9 @@ out: kfree(root); return ERR_PTR(ret); } - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -418,6 +423,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *open_ctree(struct super_block *sb) { + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -474,12 +483,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, tree_root, + __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET / - sb->s_blocksize); + BTRFS_SUPER_INFO_OFFSET, + 512); if (!fs_info->sb_buffer) goto fail_iput; @@ -494,9 +503,15 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + tree_root->nodesize = nodesize; + tree_root->leafsize = leafsize; + tree_root->sectorsize = sectorsize; + i_size_write(fs_info->btree_inode, - btrfs_super_total_blocks(disk_super) << - fs_info->btree_inode->i_blkbits); + btrfs_super_total_bytes(disk_super)); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, @@ -504,13 +519,22 @@ struct btrfs_root *open_ctree(struct super_block *sb) printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, + btrfs_super_root_level(disk_super)); tree_root->node = read_tree_block(tree_root, - btrfs_super_root(disk_super)); + btrfs_super_root(disk_super), + blocksize); if (!tree_root->node) goto fail_sb_buffer; +#if 0 + btrfs_print_leaf(tree_root, tree_root->node); + err = -EIO; + goto fail_tree_root; +#endif mutex_lock(&fs_info->fs_mutex); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, + + ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { mutex_unlock(&fs_info->fs_mutex); @@ -611,11 +635,11 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->sb_buffer); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - iput(fs_info->btree_inode); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -642,7 +666,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)buf->start, transid, root->fs_info->generation); WARN_ON(1); } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 70d9413c599f..fd4db5f810cf 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,10 +21,11 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr); -int readahead_tree_block(struct btrfs_root *root, u64 blocknr); +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize); +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb); @@ -32,7 +33,7 @@ int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr); + u64 bytenr, u32 blocksize); struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_key *location, const char *name, int namelen); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 477466d167a4..1be8f9f04a13 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -129,7 +129,7 @@ err: struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, - u64 blocknr) + u64 bytenr) { struct extent_map_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; @@ -140,7 +140,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, - blocknr, &start, &end, + bytenr, &start, &end, BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA); if (ret) { return NULL; @@ -152,7 +152,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct block_group = (struct btrfs_block_group_cache *)ptr; - if (block_group->key.objectid <= blocknr && blocknr <= + if (block_group->key.objectid <= bytenr && bytenr <= block_group->key.objectid + block_group->key.offset) return block_group; @@ -315,7 +315,7 @@ found: int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num_blocks) + u64 bytenr, u64 num_bytes) { struct btrfs_path *path; int ret; @@ -324,13 +324,14 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_extent_item *item; u32 refs; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); if (ret < 0) @@ -361,8 +362,8 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, } static int lookup_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 blocknr, - u64 num_blocks, u32 *refs) + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u32 *refs) { struct btrfs_path *path; int ret; @@ -370,9 +371,10 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, struct extent_buffer *l; struct btrfs_extent_item *item; + WARN_ON(num_bytes < root->sectorsize); path = btrfs_alloc_path(); - key.objectid = blocknr; - key.offset = num_blocks; + key.objectid = bytenr; + key.offset = num_bytes; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); @@ -380,7 +382,7 @@ static int lookup_extent_ref(struct btrfs_trans_handle *trans, goto out; if (ret != 0) { btrfs_print_leaf(root, path->nodes[0]); - printk("failed to find block number %Lu\n", blocknr); + printk("failed to find block number %Lu\n", bytenr); BUG(); } l = path->nodes[0]; @@ -394,19 +396,19 @@ out: int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return btrfs_inc_extent_ref(trans, root, - extent_buffer_blocknr(root->node), 1); + return btrfs_inc_extent_ref(trans, root, root->node->start, + root->node->len); } int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { - u64 blocknr; + u64 bytenr; u32 nritems; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; - int leaf; + int level; int ret; int faili; int err; @@ -414,11 +416,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (!root->ref_cows) return 0; - leaf = btrfs_is_leaf(buf); + level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); for (i = 0; i < nritems; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -427,18 +429,19 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_inc_extent_ref(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, fi)); + ret = btrfs_inc_extent_ref(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi)); if (ret) { faili = i; goto fail; } } else { - blocknr = btrfs_node_blockptr(buf, i); - ret = btrfs_inc_extent_ref(trans, root, blocknr, 1); + bytenr = btrfs_node_blockptr(buf, i); + ret = btrfs_inc_extent_ref(trans, root, bytenr, + btrfs_level_size(root, level - 1)); if (ret) { faili = i; goto fail; @@ -449,8 +452,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, fail: WARN_ON(1); for (i =0; i < faili; i++) { - if (leaf) { - u64 disk_blocknr; + if (level == 0) { + u64 disk_bytenr; btrfs_item_key_to_cpu(buf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; @@ -459,16 +462,17 @@ fail: if (btrfs_file_extent_type(buf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - disk_blocknr = btrfs_file_extent_disk_blocknr(buf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); + if (disk_bytenr == 0) continue; - err = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(buf, + err = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(buf, fi), 0); BUG_ON(err); } else { - blocknr = btrfs_node_blockptr(buf, i); - err = btrfs_free_extent(trans, root, blocknr, 1, 0); + bytenr = btrfs_node_blockptr(buf, i); + err = btrfs_free_extent(trans, root, bytenr, + btrfs_level_size(root, level - 1), 0); BUG_ON(err); } } @@ -558,31 +562,31 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 blocknr, u64 num, int alloc, int mark_free, - int data) + u64 bytenr, u64 num_bytes, int alloc, + int mark_free, int data) { struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - u64 total = num; + u64 total = num_bytes; u64 old_val; - u64 block_in_group; + u64 byte_in_group; u64 start; u64 end; while(total) { - cache = btrfs_lookup_block_group(info, blocknr); + cache = btrfs_lookup_block_group(info, bytenr); if (!cache) { return -1; } - block_in_group = blocknr - cache->key.objectid; - WARN_ON(block_in_group > cache->key.offset); + byte_in_group = bytenr - cache->key.objectid; + WARN_ON(byte_in_group > cache->key.offset); start = cache->key.objectid; end = start + cache->key.offset - 1; set_extent_bits(&info->block_group_cache, start, end, BLOCK_GROUP_DIRTY, GFP_NOFS); old_val = btrfs_block_group_used(&cache->item); - num = min(total, cache->key.offset - block_in_group); + num_bytes = min(total, cache->key.offset - byte_in_group); if (alloc) { if (cache->data != data && old_val < (cache->key.offset >> 1)) { @@ -608,18 +612,18 @@ static int update_block_group(struct btrfs_trans_handle *trans, start, end, bit_to_set, GFP_NOFS); } - old_val += num; + old_val += num_bytes; } else { - old_val -= num; + old_val -= num_bytes; if (mark_free) { set_extent_dirty(&info->free_space_cache, - blocknr, blocknr + num - 1, + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } } btrfs_set_block_group_used(&cache->item, old_val); - total -= num; - blocknr += num; + total -= num_bytes; + bytenr += num_bytes; } return 0; } @@ -701,13 +705,14 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct return 0; } -static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) +static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes, + int pending) { int err = 0; struct extent_buffer *buf; if (!pending) { - buf = btrfs_find_tree_block(root, blocknr); + buf = btrfs_find_tree_block(root, bytenr, num_bytes); if (buf) { if (btrfs_buffer_uptodate(buf)) { u64 transid = @@ -720,10 +725,11 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) free_extent_buffer(buf); } set_extent_dirty(&root->fs_info->pinned_extents, - blocknr, blocknr, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, GFP_NOFS); } else { set_extent_bits(&root->fs_info->pending_del, - blocknr, blocknr, EXTENT_LOCKED, GFP_NOFS); + bytenr, bytenr + num_bytes - 1, + EXTENT_LOCKED, GFP_NOFS); } BUG_ON(err < 0); return 0; @@ -733,7 +739,7 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending) * remove an extent from the root, returns 0 on success */ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin, + *root, u64 bytenr, u64 num_bytes, int pin, int mark_free) { struct btrfs_path *path; @@ -745,9 +751,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root struct btrfs_extent_item *ei; u32 refs; - key.objectid = blocknr; + key.objectid = bytenr; btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); - key.offset = num_blocks; + key.offset = num_bytes; path = btrfs_alloc_path(); if (!path) @@ -768,28 +774,29 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_mark_buffer_dirty(leaf); if (refs == 0) { - u64 super_blocks_used, root_blocks_used; + u64 super_used; + u64 root_used; if (pin) { - ret = pin_down_block(root, blocknr, 0); + ret = pin_down_bytes(root, bytenr, num_bytes, 0); BUG_ON(ret); } /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, - super_blocks_used - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, + super_used - num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); + root_used = btrfs_root_used(&root->root_item); btrfs_set_root_used(&root->root_item, - root_blocks_used - num_blocks); + root_used - num_bytes); ret = btrfs_del_item(trans, extent_root, path); if (ret) { return ret; } - ret = update_block_group(trans, root, blocknr, num_blocks, 0, + ret = update_block_group(trans, root, bytenr, num_bytes, 0, mark_free, 0); BUG_ON(ret); } @@ -836,17 +843,18 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct * remove an extent from the root, returns 0 on success */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *root, u64 blocknr, u64 num_blocks, int pin) + *root, u64 bytenr, u64 num_bytes, int pin) { struct btrfs_root *extent_root = root->fs_info->extent_root; int pending_ret; int ret; + WARN_ON(num_bytes < root->sectorsize); if (root == extent_root) { - pin_down_block(root, blocknr, 1); + pin_down_bytes(root, bytenr, num_bytes, 1); return 0; } - ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0); + ret = __free_extent(trans, root, bytenr, num_bytes, pin, pin == 0); pending_ret = del_pending_extents(trans, root->fs_info->extent_root); return ret ? ret : pending_ret; } @@ -860,8 +868,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root * Any available blocks before search_start are skipped. */ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root - *orig_root, u64 num_blocks, u64 empty_size, - u64 search_start, u64 search_end, u64 hint_block, + *orig_root, u64 num_bytes, u64 empty_size, + u64 search_start, u64 search_end, u64 hint_byte, struct btrfs_key *ins, u64 exclude_start, u64 exclude_nr, int data) { @@ -870,30 +878,29 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root int ret; u64 hole_size = 0; int slot = 0; - u64 last_block = 0; + u64 last_byte = 0; u64 orig_search_start = search_start; int start_found; struct extent_buffer *l; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; - int total_needed = num_blocks; + u64 total_needed = num_bytes; int level; struct btrfs_block_group_cache *block_group; int full_scan = 0; int wrapped = 0; - u64 cached_search_start = 0; - WARN_ON(num_blocks < 1); + WARN_ON(num_bytes < root->sectorsize); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); level = btrfs_header_level(root->node); if (search_end == (u64)-1) - search_end = btrfs_super_total_blocks(&info->super_copy); - if (hint_block) { - block_group = btrfs_lookup_block_group(info, hint_block); + search_end = btrfs_super_total_bytes(&info->super_copy); + if (hint_byte) { + block_group = btrfs_lookup_block_group(info, hint_byte); block_group = btrfs_find_block_group(root, block_group, - hint_block, data, 1); + hint_byte, data, 1); } else { block_group = btrfs_find_block_group(root, trans->block_group, 0, @@ -906,7 +913,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root check_failed: search_start = find_search_start(root, &block_group, search_start, total_needed, data); - cached_search_start = search_start; btrfs_init_path(path); ins->objectid = search_start; @@ -958,27 +964,27 @@ check_failed: start_found = 1; goto check_pending; } - ins->objectid = last_block > search_start ? - last_block : search_start; + ins->objectid = last_byte > search_start ? + last_byte : search_start; ins->offset = search_end - ins->objectid; goto check_pending; } btrfs_item_key_to_cpu(l, &key, slot); - if (key.objectid >= search_start && key.objectid > last_block && + if (key.objectid >= search_start && key.objectid > last_byte && start_found) { - if (last_block < search_start) - last_block = search_start; - hole_size = key.objectid - last_block; - if (hole_size >= num_blocks) { - ins->objectid = last_block; + if (last_byte < search_start) + last_byte = search_start; + hole_size = key.objectid - last_byte; + if (hole_size >= num_bytes) { + ins->objectid = last_byte; ins->offset = hole_size; goto check_pending; } } if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) { if (!start_found) { - last_block = key.objectid; + last_byte = key.objectid; start_found = 1; } goto next; @@ -986,9 +992,9 @@ check_failed: start_found = 1; - last_block = key.objectid + key.offset; + last_byte = key.objectid + key.offset; - if (!full_scan && last_block >= block_group->key.objectid + + if (!full_scan && last_byte >= block_group->key.objectid + block_group->key.offset) { btrfs_release_path(root, path); search_start = block_group->key.objectid + @@ -1006,20 +1012,20 @@ check_pending: btrfs_release_path(root, path); BUG_ON(ins->objectid < search_start); - if (ins->objectid + num_blocks >= search_end) + if (ins->objectid + num_bytes >= search_end) goto enospc; if (test_range_bit(&info->extent_ins, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_LOCKED, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } if (test_range_bit(&info->pinned_extents, ins->objectid, - ins->objectid + num_blocks -1, EXTENT_DIRTY, 0)) { - search_start = ins->objectid + num_blocks; + ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) { + search_start = ins->objectid + num_bytes; goto new_group; } - if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start && + if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start && ins->objectid < exclude_start + exclude_nr)) { search_start = exclude_start + exclude_nr; goto new_group; @@ -1029,12 +1035,12 @@ check_pending: if (block_group) trans->block_group = block_group; } - ins->offset = num_blocks; + ins->offset = num_bytes; btrfs_free_path(path); return 0; new_group: - if (search_start + num_blocks >= search_end) { + if (search_start + num_bytes >= search_end) { enospc: search_start = orig_search_start; if (full_scan) { @@ -1069,12 +1075,12 @@ error: */ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner, - u64 num_blocks, u64 empty_size, u64 hint_block, + u64 num_bytes, u64 empty_size, u64 hint_byte, u64 search_end, struct btrfs_key *ins, int data) { int ret; int pending_ret; - u64 super_blocks_used, root_blocks_used; + u64 super_used, root_used; u64 search_start = 0; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; @@ -1083,9 +1089,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); btrfs_set_stack_extent_owner(&extent_item, owner); - WARN_ON(num_blocks < 1); - ret = find_free_extent(trans, root, num_blocks, empty_size, - search_start, search_end, hint_block, ins, + WARN_ON(num_bytes < root->sectorsize); + ret = find_free_extent(trans, root, num_bytes, empty_size, + search_start, search_end, hint_byte, ins, trans->alloc_exclude_start, trans->alloc_exclude_nr, data); BUG_ON(ret); @@ -1093,21 +1099,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, return ret; /* block accounting for super block */ - super_blocks_used = btrfs_super_blocks_used(&info->super_copy); - btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used + - num_blocks); + super_used = btrfs_super_bytes_used(&info->super_copy); + btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); /* block accounting for root item */ - root_blocks_used = btrfs_root_used(&root->root_item); - btrfs_set_root_used(&root->root_item, root_blocks_used + - num_blocks); + root_used = btrfs_root_used(&root->root_item); + btrfs_set_root_used(&root->root_item, root_used + num_bytes); clear_extent_dirty(&root->fs_info->free_space_cache, ins->objectid, ins->objectid + ins->offset - 1, GFP_NOFS); if (root == extent_root) { - BUG_ON(num_blocks != 1); set_extent_bits(&root->fs_info->extent_ins, ins->objectid, ins->objectid + ins->offset - 1, EXTENT_LOCKED, GFP_NOFS); @@ -1146,7 +1149,8 @@ update_block: * returns the tree buffer or NULL. */ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 hint, + struct btrfs_root *root, + u32 blocksize, u64 hint, u64 empty_size) { struct btrfs_key ins; @@ -1154,14 +1158,15 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *buf; ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, - 1, empty_size, hint, (u64)-1, &ins, 0); + blocksize, empty_size, hint, + (u64)-1, &ins, 0); if (ret) { BUG_ON(ret > 0); return ERR_PTR(ret); } - buf = btrfs_find_create_tree_block(root, ins.objectid); + buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize); if (!buf) { - btrfs_free_extent(trans, root, ins.objectid, 1, 0); + btrfs_free_extent(trans, root, ins.objectid, blocksize, 0); return ERR_PTR(-ENOMEM); } btrfs_set_buffer_uptodate(buf); @@ -1191,7 +1196,7 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); for (i = 0; i < nritems; i++) { - u64 disk_blocknr; + u64 disk_bytenr; btrfs_item_key_to_cpu(leaf, &key, i); if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) @@ -1204,11 +1209,11 @@ static int drop_leaf_ref(struct btrfs_trans_handle *trans, * FIXME make sure to insert a trans record that * repeats the snapshot del on crash */ - disk_blocknr = btrfs_file_extent_disk_blocknr(leaf, fi); - if (disk_blocknr == 0) + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + if (disk_bytenr == 0) continue; - ret = btrfs_free_extent(trans, root, disk_blocknr, - btrfs_file_extent_disk_num_blocks(leaf, fi), 0); + ret = btrfs_free_extent(trans, root, disk_bytenr, + btrfs_file_extent_disk_num_bytes(leaf, fi), 0); BUG_ON(ret); } return 0; @@ -1219,19 +1224,23 @@ static void reada_walk_down(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; int ret; u32 refs; + int level; + u32 blocksize; nritems = btrfs_header_nritems(node); + level = btrfs_header_level(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(node, i); + blocksize = btrfs_level_size(root, level - 1); + ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) continue; mutex_unlock(&root->fs_info->fs_mutex); - ret = readahead_tree_block(root, blocknr); + ret = readahead_tree_block(root, bytenr, blocksize); cond_resched(); mutex_lock(&root->fs_info->fs_mutex); if (ret) @@ -1248,15 +1257,16 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; u32 refs; WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = lookup_extent_ref(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, &refs); + path->nodes[*level]->start, + path->nodes[*level]->len, &refs); BUG_ON(ret); if (refs > 1) goto out; @@ -1283,30 +1293,33 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(ret); break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); + blocksize = btrfs_level_size(root, *level - 1); + ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; - ret = btrfs_free_extent(trans, root, blocknr, 1, 1); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, 1); BUG_ON(ret); continue; } - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); mutex_unlock(&root->fs_info->fs_mutex); - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, blocksize); mutex_lock(&root->fs_info->fs_mutex); /* we dropped the lock, check one more time */ - ret = lookup_extent_ref(trans, root, blocknr, 1, &refs); + ret = lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); BUG_ON(ret); if (refs != 1) { path->slots[*level]++; free_extent_buffer(next); ret = btrfs_free_extent(trans, root, - blocknr, 1, 1); + bytenr, blocksize, 1); BUG_ON(ret); continue; } @@ -1321,8 +1334,8 @@ static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root out: WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); - ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), 1, 1); + ret = btrfs_free_extent(trans, root, path->nodes[*level]->start, + path->nodes[*level]->len, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; *level += 1; @@ -1359,8 +1372,8 @@ static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } else { ret = btrfs_free_extent(trans, root, - extent_buffer_blocknr(path->nodes[*level]), - 1, 1); + path->nodes[*level]->start, + path->nodes[*level]->len, 1); BUG_ON(ret); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; @@ -1476,16 +1489,12 @@ int btrfs_read_block_groups(struct btrfs_root *root) struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; - u64 group_size_blocks; block_group_cache = &info->block_group_cache; - group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >> - info->sb->s_blocksize_bits; - root = info->extent_root; key.objectid = 0; - key.offset = group_size_blocks; + key.offset = BTRFS_BLOCK_GROUP_SIZE; btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); path = btrfs_alloc_path(); @@ -1532,7 +1541,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) (u64)cache); if (key.objectid >= - btrfs_super_total_blocks(&info->super_copy)) + btrfs_super_total_bytes(&info->super_copy)) break; } diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index e081558d52ff..f658703c42e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1963,18 +1963,27 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i) struct page *p; if (i == 0) return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + if (eb->last_page && eb->last_page->index == i) + return eb->last_page; + p = find_get_page(eb->first_page->mapping, i); page_cache_release(p); + eb->last_page = p; return p; } +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -1986,7 +1995,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -1994,6 +2003,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); if (!p) { + WARN_ON(1); /* make sure the free only frees the pages we've * grabbed a reference on */ @@ -2021,8 +2031,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, u64 start, unsigned long len, gfp_t mask) { - unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT) + 1; + unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; struct extent_buffer *eb; @@ -2033,7 +2042,7 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, if (!eb || IS_ERR(eb)) return NULL; - eb->alloc_addr = __builtin_return_address(0); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); eb->start = start; eb->len = len; atomic_set(&eb->refs, 1); @@ -2070,8 +2079,7 @@ void free_extent_buffer(struct extent_buffer *eb) if (!atomic_dec_and_test(&eb->refs)) return; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); if (eb->first_page) page_cache_release(eb->first_page); @@ -2094,8 +2102,7 @@ int clear_extent_buffer_dirty(struct extent_map_tree *tree, u64 end = start + eb->len - 1; set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); @@ -2145,8 +2152,7 @@ int set_extent_buffer_uptodate(struct extent_map_tree *tree, struct page *page; unsigned long num_pages; - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -2191,8 +2197,7 @@ int read_extent_buffer_pages(struct extent_map_tree *tree, return 0; } - num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT) + 1; + num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (PageUptodate(page)) { @@ -2267,14 +2272,14 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } EXPORT_SYMBOL(read_extent_buffer); -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) +static int __map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; + struct page *p; size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; unsigned long end_i = (start_offset + start + min_len) >> @@ -2283,21 +2288,59 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, if (i != end_i) return -EINVAL; - WARN_ON(start > eb->len); + if (start >= eb->len) { + printk("bad start in map eb start %Lu len %lu caller start %lu min %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } if (i == 0) { offset = start_offset; *map_start = 0; } else { + offset = 0; *map_start = (i << PAGE_CACHE_SHIFT) - start_offset; } - kaddr = kmap_atomic(extent_buffer_page(eb, i), km); + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); *token = kaddr; *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + if (start >= eb->map_start && + start + min_len <= eb->map_start + eb->map_len) { + *token = eb->map_token; + *map = eb->kaddr; + *map_start = eb->map_start; + *map_len = eb->map_len; + return 0; + } + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = __map_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} EXPORT_SYMBOL(map_extent_buffer); void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) @@ -2574,7 +2617,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, ((unsigned long)PAGE_CACHE_SIZE - 1); src_off_in_page = src_end & ((unsigned long)PAGE_CACHE_SIZE - 1); - if (src_i == 0) src_off_in_page += start_offset; if (dst_i == 0) @@ -2582,14 +2624,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, cur = min(len, src_off_in_page + 1); cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), extent_buffer_page(dst, src_i), dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur); - dst_end -= cur - 1; - src_end -= cur - 1; + dst_end -= cur; + src_end -= cur; len -= cur; } } diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 75dc600dc0f9..4ef8bdd68348 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -70,7 +70,12 @@ struct extent_buffer { struct list_head list; struct list_head leak_list; struct page *first_page; + struct page *last_page; unsigned long alloc_addr; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; }; typedef struct extent_map *(get_extent_t)(struct inode *inode, @@ -147,11 +152,6 @@ static inline void extent_buffer_get(struct extent_buffer *eb) atomic_inc(&eb->refs); } -static inline u64 extent_buffer_blocknr(struct extent_buffer *eb) -{ - return eb->start / 4096; -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 226f6d028c3f..9a65e97a4e28 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -27,8 +27,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, - u64 offset, u64 disk_num_blocks, - u64 num_blocks) + u64 offset, u64 disk_num_bytes, + u64 num_bytes) { int ret = 0; struct btrfs_file_extent_item *item; @@ -50,10 +50,10 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - btrfs_set_file_extent_disk_blocknr(leaf, item, offset); - btrfs_set_file_extent_disk_num_blocks(leaf, item, disk_num_blocks); + btrfs_set_file_extent_disk_bytenr(leaf, item, offset); + btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes); btrfs_set_file_extent_offset(leaf, item, 0); - btrfs_set_file_extent_num_blocks(leaf, item, num_blocks); + btrfs_set_file_extent_num_bytes(leaf, item, num_bytes); btrfs_set_file_extent_generation(leaf, item, trans->transid); btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG); btrfs_mark_buffer_dirty(leaf); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1734ca695555..844d8807e44a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -120,9 +120,9 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); ptr = btrfs_file_extent_inline_start(ei); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page, KM_USER1); write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr, KM_USER1); btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -142,11 +142,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, struct inode *inode = file->f_path.dentry->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - u64 hint_block; - u64 num_blocks; + u64 hint_byte; + u64 num_bytes; u64 start_pos; u64 end_of_last_block; u64 end_pos = pos + write_bytes; + u32 inline_size; loff_t isize = i_size_read(inode); em = alloc_extent_map(GFP_NOFS); @@ -156,11 +157,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; + num_bytes = (write_bytes + pos - start_pos + + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); down_read(&BTRFS_I(inode)->root->snap_sem); - end_of_last_block = start_pos + (num_blocks << inode->i_blkbits) - 1; + end_of_last_block = start_pos + num_bytes - 1; + lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -169,8 +171,8 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, goto out_unlock; } btrfs_set_trans_block_group(trans, inode); - inode->i_blocks += num_blocks << 3; - hint_block = 0; + inode->i_blocks += num_bytes >> 9; + hint_byte = 0; if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); @@ -191,11 +193,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, - &hint_block); + &hint_byte); if (err) goto failed; - hole_size >>= inode->i_blkbits; err = btrfs_insert_file_extent(trans, root, inode->i_ino, last_pos_in_file, @@ -209,8 +210,10 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ + inline_size = end_pos - start_pos; if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size >= PAGE_CACHE_SIZE) { u64 last_end; for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -224,10 +227,9 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, } else { struct page *p = pages[0]; /* step one, delete the existing extents in this range */ - /* FIXME blocksize != pagesize */ err = btrfs_drop_extents(trans, root, inode, start_pos, (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_block); + ~((u64)root->sectorsize - 1), &hint_byte); if (err) goto failed; @@ -283,7 +285,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block) + u64 start, u64 end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -346,8 +348,7 @@ next_slot: found_type = btrfs_file_extent_type(leaf, extent); if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = key.offset + - (btrfs_file_extent_num_blocks(leaf, extent) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, extent); found_extent = 1; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { struct btrfs_item *item; @@ -386,17 +387,17 @@ next_slot: if (end < extent_end && end >= key.offset) { if (found_extent) { - u64 disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf,extent); - u64 disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, + u64 disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); + u64 disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); read_extent_buffer(leaf, &old, (unsigned long)extent, sizeof(old)); - if (disk_blocknr != 0) { + if (disk_bytenr != 0) { ret = btrfs_inc_extent_ref(trans, root, - disk_blocknr, disk_num_blocks); + disk_bytenr, disk_num_bytes); BUG_ON(ret); } } @@ -410,21 +411,19 @@ next_slot: keep = 1; WARN_ON(start & (root->sectorsize - 1)); if (found_extent) { - new_num = (start - key.offset) >> - inode->i_blkbits; - old_num = btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, - extent); - if (btrfs_file_extent_disk_blocknr(leaf, - extent)) { + new_num = start - key.offset; + old_num = btrfs_file_extent_num_bytes(leaf, + extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); + if (btrfs_file_extent_disk_bytenr(leaf, + extent)) { inode->i_blocks -= - (old_num - new_num) << 3; + (old_num - new_num) >> 9; } - btrfs_set_file_extent_num_blocks(leaf, - extent, - new_num); + btrfs_set_file_extent_num_bytes(leaf, extent, + new_num); btrfs_mark_buffer_dirty(leaf); } else { WARN_ON(1); @@ -432,33 +431,32 @@ next_slot: } /* delete the entire extent */ if (!keep) { - u64 disk_blocknr = 0; - u64 disk_num_blocks = 0; - u64 extent_num_blocks = 0; + u64 disk_bytenr = 0; + u64 disk_num_bytes = 0; + u64 extent_num_bytes = 0; if (found_extent) { - disk_blocknr = - btrfs_file_extent_disk_blocknr(leaf, + disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, extent); - disk_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - extent); - extent_num_blocks = - btrfs_file_extent_num_blocks(leaf, - extent); - *hint_block = - btrfs_file_extent_disk_blocknr(leaf, + disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, extent); + extent_num_bytes = + btrfs_file_extent_num_bytes(leaf, extent); + *hint_byte = + btrfs_file_extent_disk_bytenr(leaf, + extent); } ret = btrfs_del_item(trans, root, path); /* TODO update progress marker and return */ BUG_ON(ret); btrfs_release_path(root, path); extent = NULL; - if (found_extent && disk_blocknr != 0) { - inode->i_blocks -= extent_num_blocks << 3; + if (found_extent && disk_bytenr != 0) { + inode->i_blocks -= extent_num_bytes >> 9; ret = btrfs_free_extent(trans, root, - disk_blocknr, - disk_num_blocks, 0); + disk_bytenr, + disk_num_bytes, 0); } BUG_ON(ret); @@ -491,20 +489,19 @@ next_slot: (unsigned long)extent, sizeof(old)); btrfs_set_file_extent_offset(leaf, extent, - le64_to_cpu(old.offset) + - ((end - key.offset) >> inode->i_blkbits)); - WARN_ON(le64_to_cpu(old.num_blocks) < - (extent_end - end) >> inode->i_blkbits); - btrfs_set_file_extent_num_blocks(leaf, extent, - (extent_end - end) >> inode->i_blkbits); - + le64_to_cpu(old.offset) + end - key.offset); + WARN_ON(le64_to_cpu(old.num_bytes) < + (extent_end - end)); + btrfs_set_file_extent_num_bytes(leaf, extent, + extent_end - end); btrfs_set_file_extent_type(leaf, extent, BTRFS_FILE_EXTENT_REG); + btrfs_mark_buffer_dirty(path->nodes[0]); - if (le64_to_cpu(old.disk_blocknr) != 0) { + if (le64_to_cpu(old.disk_bytenr) != 0) { inode->i_blocks += - btrfs_file_extent_num_blocks(leaf, - extent) << 3; + btrfs_file_extent_num_bytes(leaf, + extent) >> 9; } ret = 0; goto out; @@ -531,12 +528,9 @@ static int prepare_pages(struct btrfs_root *root, unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = file->f_path.dentry->d_inode; int err = 0; - u64 num_blocks; u64 start_pos; start_pos = pos & ~((u64)root->sectorsize - 1); - num_blocks = (write_bytes + pos - start_pos + root->sectorsize - 1) >> - inode->i_blkbits; memset(pages, 0, num_pages * sizeof(struct page *)); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbe2836364e0..d6b3a55ed8e0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -77,19 +77,19 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_key ins; u64 alloc_hint = 0; - u64 num_blocks; + u64 num_bytes; int ret; - u64 blocksize = 1 << inode->i_blkbits; + u64 blocksize = root->sectorsize; mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); - num_blocks = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_blocks, &alloc_hint); - num_blocks = num_blocks >> inode->i_blkbits; - ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_blocks, 0, + start, start + num_bytes, &alloc_hint); + + ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); @@ -186,7 +186,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) zeroit: printk("btrfs csum failed ino %lu off %llu\n", page->mapping->host->i_ino, (unsigned long long)start); - memset(kaddr + offset, 1, end - start + 1); flush_dcache_page(page); + memset(kaddr + offset, 1, end - start + 1); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_IRQ0); return 0; } @@ -547,7 +548,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; - u64 extent_num_blocks = 0; + u64 extent_num_bytes = 0; u64 item_end = 0; int found_extent; int del_item; @@ -593,8 +594,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { item_end += - btrfs_file_extent_num_blocks(leaf, fi) << - inode->i_blkbits; + btrfs_file_extent_num_bytes(leaf, fi); } } if (found_type == BTRFS_CSUM_ITEM_KEY) { @@ -626,28 +626,27 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_INLINE) { u64 num_dec; - extent_start = btrfs_file_extent_disk_blocknr(leaf, fi); + extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); if (!del_item) { - u64 orig_num_blocks = - btrfs_file_extent_num_blocks(leaf, fi); - extent_num_blocks = inode->i_size - + u64 orig_num_bytes = + btrfs_file_extent_num_bytes(leaf, fi); + extent_num_bytes = inode->i_size - found_key.offset + root->sectorsize - 1; - extent_num_blocks >>= inode->i_blkbits; - btrfs_set_file_extent_num_blocks(leaf, fi, - extent_num_blocks); - num_dec = (orig_num_blocks - - extent_num_blocks) << 3; + btrfs_set_file_extent_num_bytes(leaf, fi, + extent_num_bytes); + num_dec = (orig_num_bytes - + extent_num_bytes) >> 9; if (extent_start != 0) { inode->i_blocks -= num_dec; } btrfs_mark_buffer_dirty(leaf); } else { - extent_num_blocks = - btrfs_file_extent_disk_num_blocks(leaf, - fi); + extent_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, + fi); /* FIXME blocksize != 4096 */ - num_dec = btrfs_file_extent_num_blocks(leaf, - fi) << 3; + num_dec = btrfs_file_extent_num_bytes(leaf, + fi) >> 9; if (extent_start != 0) { found_extent = 1; inode->i_blocks -= num_dec; @@ -664,7 +663,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); if (found_extent) { ret = btrfs_free_extent(trans, root, extent_start, - extent_num_blocks, 0); + extent_num_bytes, 0); BUG_ON(ret); } } @@ -709,7 +708,8 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, static int btrfs_truncate_page(struct address_space *mapping, loff_t from) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + struct btrfs_root *root = BTRFS_I(inode)->root; + u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; @@ -719,7 +719,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - down_read(&BTRFS_I(inode)->root->snap_sem); + down_read(&root->snap_sem); ret = -ENOMEM; page = grab_cache_page(mapping, index); if (!page) @@ -778,8 +778,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) err = btrfs_drop_extents(trans, root, inode, pos, pos + hole_size, &alloc_hint); - hole_size >>= inode->i_blkbits; - err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); btrfs_end_transaction(trans, root); @@ -1490,7 +1488,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, { int ret; int err = 0; - u64 blocknr; + u64 bytenr; u64 extent_start = 0; u64 extent_end = 0; u64 objectid = inode->i_ino; @@ -1540,10 +1538,6 @@ again: leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - - blocknr = btrfs_file_extent_disk_blocknr(leaf, item); - blocknr += btrfs_file_extent_offset(leaf, item); - /* are we inside the extent that was found? */ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); found_type = btrfs_key_type(&found_key); @@ -1556,8 +1550,7 @@ again: extent_start = found_key.offset; if (found_type == BTRFS_FILE_EXTENT_REG) { extent_end = extent_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits); + btrfs_file_extent_num_bytes(leaf, item); err = 0; if (start < extent_start || start >= extent_end) { em->start = start; @@ -1570,17 +1563,18 @@ again: } goto not_found_em; } - if (btrfs_file_extent_disk_blocknr(leaf, item) == 0) { + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) { em->start = extent_start; em->end = extent_end - 1; em->block_start = EXTENT_MAP_HOLE; em->block_end = EXTENT_MAP_HOLE; goto insert; } - em->block_start = blocknr << inode->i_blkbits; + bytenr += btrfs_file_extent_offset(leaf, item); + em->block_start = bytenr; em->block_end = em->block_start + - (btrfs_file_extent_num_blocks(leaf, item) << - inode->i_blkbits) - 1; + btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; em->end = extent_end - 1; goto insert; @@ -1592,7 +1586,8 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = extent_start | ((u64)root->sectorsize - 1); + extent_end = (extent_start + size) | + ((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { @@ -1617,8 +1612,10 @@ again: ptr = btrfs_file_extent_inline_start(item); map = kmap(page); read_extent_buffer(leaf, map + page_offset, ptr, size); + /* memset(map + page_offset + size, 0, root->sectorsize - (page_offset + size)); + */ flush_dcache_page(page); kunmap(page); set_extent_uptodate(em_tree, extent_start, @@ -1836,13 +1833,13 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); - leaf = btrfs_alloc_free_block(trans, root, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 0); if (IS_ERR(leaf)) return PTR_ERR(leaf); btrfs_set_header_nritems(leaf, 0); btrfs_set_header_level(leaf, 0); - btrfs_set_header_blocknr(leaf, extent_buffer_blocknr(leaf)); + btrfs_set_header_bytenr(leaf, leaf->start); btrfs_set_header_generation(leaf, trans->transid); btrfs_set_header_owner(leaf, root->root_key.objectid); write_extent_buffer(leaf, root->fs_info->fsid, @@ -1858,7 +1855,8 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) inode_item->nblocks = cpu_to_le64(1); inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - btrfs_set_root_blocknr(&root_item, extent_buffer_blocknr(leaf)); + btrfs_set_root_bytenr(&root_item, leaf->start); + btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); btrfs_set_root_used(&root_item, 0); @@ -1971,8 +1969,8 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); - btrfs_set_root_blocknr(&new_root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&new_root_item, root->node->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index a825ce078a54..9f8696c8a8e8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -36,7 +36,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) u32 type; printk("leaf %llu total ptrs %d free space %d\n", - (unsigned long long)btrfs_header_blocknr(l), nr, + (unsigned long long)btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l)); for (i = 0 ; i < nr ; i++) { item = btrfs_item_nr(l, i); @@ -65,8 +65,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) break; case BTRFS_ROOT_ITEM_KEY: ri = btrfs_item_ptr(l, i, struct btrfs_root_item); - printk("\t\troot data blocknr %llu refs %u\n", - (unsigned long long)btrfs_disk_root_blocknr(l, ri), + printk("\t\troot data bytenr %llu refs %u\n", + (unsigned long long)btrfs_disk_root_bytenr(l, ri), btrfs_disk_root_refs(l, ri)); break; case BTRFS_EXTENT_ITEM_KEY: @@ -84,12 +84,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) btrfs_file_extent_inline_len(l, item)); break; } - printk("\t\textent data disk block %llu nr %llu\n", - (unsigned long long)btrfs_file_extent_disk_blocknr(l, fi), - (unsigned long long)btrfs_file_extent_disk_num_blocks(l, fi)); + printk("\t\textent data disk bytenr %llu nr %llu\n", + (unsigned long long)btrfs_file_extent_disk_bytenr(l, fi), + (unsigned long long)btrfs_file_extent_disk_num_bytes(l, fi)); printk("\t\textent data offset %llu nr %llu\n", (unsigned long long)btrfs_file_extent_offset(l, fi), - (unsigned long long)btrfs_file_extent_num_blocks(l, fi)); + (unsigned long long)btrfs_file_extent_num_bytes(l, fi)); break; case BTRFS_BLOCK_GROUP_ITEM_KEY: bi = btrfs_item_ptr(l, i, @@ -106,16 +106,18 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) int i; u32 nr; struct btrfs_key key; + int level; if (!c) return; nr = btrfs_header_nritems(c); - if (btrfs_is_leaf(c)) { + level = btrfs_header_level(c); + if (level == 0) { btrfs_print_leaf(root, c); return; } printk("node %llu level %d total ptrs %d free spc %u\n", - (unsigned long long)btrfs_header_blocknr(c), + (unsigned long long)btrfs_header_bytenr(c), btrfs_header_level(c), nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); for (i = 0; i < nr; i++) { @@ -129,7 +131,8 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) } for (i = 0; i < nr; i++) { struct extent_buffer *next = read_tree_block(root, - btrfs_node_blockptr(c, i)); + btrfs_node_blockptr(c, i), + btrfs_level_size(root, level - 1)); if (btrfs_is_leaf(next) && btrfs_header_level(c) != 1) BUG(); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39a1435c68f1..5c4370f3a5b8 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -303,10 +303,12 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + int bits = dentry->d_sb->s_blocksize_bits; buf->f_namelen = BTRFS_NAME_LEN; - buf->f_blocks = btrfs_super_total_blocks(disk_super); - buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super); + buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; + buf->f_bfree = buf->f_blocks - + (btrfs_super_bytes_used(disk_super) >> bits); buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 9654e90eec89..0bd1fd3d29df 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -42,14 +42,15 @@ static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf) static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf) { + return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_blocks_used(&fs->super_copy)); + (unsigned long long)btrfs_super_bytes_used(&fs->super_copy)); } static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)btrfs_super_total_blocks(&fs->super_copy)); + (unsigned long long)btrfs_super_total_bytes(&fs->super_copy)); } static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 55289b71056e..60f61345a8d0 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -205,12 +205,13 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, btrfs_write_dirty_block_groups(trans, extent_root); while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == - extent_buffer_blocknr(extent_root->node)) + old_extent_block = btrfs_root_bytenr(&extent_root->root_item); + if (old_extent_block == extent_root->node->start) break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_buffer_blocknr(extent_root->node)); + btrfs_set_root_bytenr(&extent_root->root_item, + extent_root->node->start); + btrfs_set_root_level(&extent_root->root_item, + btrfs_header_level(extent_root->node)); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -284,8 +285,8 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); if (root->commit_root == root->node) { - WARN_ON(extent_buffer_blocknr(root->node) != - btrfs_root_blocknr(&root->root_item)); + WARN_ON(root->node->start != + btrfs_root_bytenr(&root->root_item)); free_extent_buffer(root->commit_root); root->commit_root = NULL; @@ -314,8 +315,10 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, root->commit_root = NULL; root->root_key.offset = root->fs_info->generation; - btrfs_set_root_blocknr(&root->root_item, - extent_buffer_blocknr(root->node)); + btrfs_set_root_bytenr(&root->root_item, + root->node->start); + btrfs_set_root_level(&root->root_item, + btrfs_header_level(root->node)); err = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); @@ -407,8 +410,8 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, struct dirty_root *dirty; struct btrfs_trans_handle *trans; unsigned long nr; - u64 num_blocks; - u64 blocks_used; + u64 num_bytes; + u64 bytes_used; int ret = 0; int err; @@ -419,7 +422,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); - num_blocks = btrfs_root_used(&dirty->root->root_item); + num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; while(1) { @@ -446,12 +449,12 @@ static int drop_dirty_roots(struct btrfs_root *tree_root, } BUG_ON(ret); - num_blocks -= btrfs_root_used(&dirty->root->root_item); - blocks_used = btrfs_root_used(&root->root_item); - if (num_blocks) { + num_bytes -= btrfs_root_used(&dirty->root->root_item); + bytes_used = btrfs_root_used(&root->root_item); + if (num_bytes) { record_root_in_trans(root); btrfs_set_root_used(&root->root_item, - blocks_used - num_blocks); + bytes_used - num_bytes); } ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { @@ -560,7 +563,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_super_generation(&root->fs_info->super_copy, cur_trans->transid); btrfs_set_super_root(&root->fs_info->super_copy, - extent_buffer_blocknr(root->fs_info->tree_root->node)); + root->fs_info->tree_root->node->start); + btrfs_set_super_root_level(&root->fs_info->super_copy, + btrfs_header_level(root->fs_info->tree_root->node)); write_extent_buffer(root->fs_info->sb_buffer, &root->fs_info->super_copy, 0, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index daf019afa0a1..3feac2f28628 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -27,13 +27,15 @@ static void reada_defrag(struct btrfs_root *root, { int i; u32 nritems; - u64 blocknr; + u64 bytenr; + u32 blocksize; int ret; + blocksize = btrfs_level_size(root, btrfs_header_level(node) - 1); nritems = btrfs_header_nritems(node); for (i = 0; i < nritems; i++) { - blocknr = btrfs_node_blockptr(node, i); - ret = readahead_tree_block(root, blocknr); + bytenr = btrfs_node_blockptr(node, i); + ret = readahead_tree_block(root, bytenr, blocksize); if (ret) break; } @@ -46,7 +48,7 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, { struct extent_buffer *next; struct extent_buffer *cur; - u64 blocknr; + u64 bytenr; int ret = 0; int is_extent = 0; @@ -80,10 +82,11 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, break; } - blocknr = btrfs_node_blockptr(cur, path->slots[*level]); + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); if (cache_only) { - next = btrfs_find_tree_block(root, blocknr); + next = btrfs_find_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); /* FIXME, test for defrag */ if (!next || !btrfs_buffer_uptodate(next)) { free_extent_buffer(next); @@ -91,7 +94,8 @@ static int defrag_walk_down(struct btrfs_trans_handle *trans, continue; } } else { - next = read_tree_block(root, blocknr); + next = read_tree_block(root, bytenr, + btrfs_level_size(root, *level - 1)); } ret = btrfs_cow_block(trans, root, next, path->nodes[*level], path->slots[*level], &next); -- cgit v1.2.3 From 3326d1b07c0cb6a2ff5b835b7a2cffa54124d074 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:25 -0400 Subject: Btrfs: Allow tails larger than one page Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 24 +++++++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 152 ++++++++++++++++++++++++++++++++++++++++++------------- fs/btrfs/inode.c | 32 +++++++----- fs/btrfs/super.c | 2 - 5 files changed, 156 insertions(+), 56 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index bef61ee883e1..f691542dfa15 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1766,8 +1766,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root /* first try to make some room by pushing left and right */ wret = push_leaf_left(trans, root, path, data_size); - if (wret < 0) + if (wret < 0) { return wret; + } if (wret) { wret = push_leaf_right(trans, root, path, data_size); if (wret < 0) @@ -1777,8 +1778,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root /* did the pushes work? */ if (btrfs_leaf_free_space(root, l) >= - sizeof(struct btrfs_item) + data_size) + sizeof(struct btrfs_item) + data_size) { return 0; + } if (!path->nodes[1]) { ret = insert_new_root(trans, root, path, 1); @@ -1822,7 +1824,11 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } mid = slot; - double_split = 1; + if (mid != nritems && + leaf_space_used(l, mid, nritems - mid) + + space_needed > BTRFS_LEAF_DATA_SIZE(root)) { + double_split = 1; + } } } else { if (leaf_space_used(l, 0, mid + 1) + space_needed > @@ -1910,8 +1916,9 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root BUG_ON(path->slots[0] < 0); - if (!double_split) + if (!double_split) { return ret; + } right = btrfs_alloc_free_block(trans, root, root->leafsize, l->start, 0); @@ -2048,7 +2055,11 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, old_data = btrfs_item_end_nr(leaf, slot); BUG_ON(slot < 0); - BUG_ON(slot >= nritems); + if (slot >= nritems) { + btrfs_print_leaf(root, leaf); + printk("slot %d too large, nritems %d\n", slot, nritems); + BUG_ON(1); + } /* * item0..itemN ... dataN.offset..dataN.size .. data0.size @@ -2132,6 +2143,9 @@ int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, if (btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item) + data_size) { + btrfs_print_leaf(root, leaf); + printk("not enough freespace need %u have %d\n", + data_size, btrfs_leaf_free_space(root, leaf)); BUG(); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54c754dd9a14..18994c53106c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1137,7 +1137,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end); extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_block); + u64 start, u64 end, u64 inline_end, u64 *hint_block); /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 844d8807e44a..1af2b6534dad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -82,8 +82,9 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages) static int insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 offset, ssize_t size, - struct page *page, size_t page_offset) + u64 offset, size_t size, + struct page **pages, size_t page_offset, + int num_pages) { struct btrfs_key key; struct btrfs_path *path; @@ -91,9 +92,12 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, char *kaddr; unsigned long ptr; struct btrfs_file_extent_item *ei; + struct page *page; u32 datasize; int err = 0; int ret; + int i; + ssize_t cur_size; path = btrfs_alloc_path(); if (!path) @@ -104,25 +108,97 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans, key.objectid = inode->i_ino; key.offset = offset; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); - BUG_ON(size >= PAGE_CACHE_SIZE); - datasize = btrfs_file_extent_calc_inline_size(size); + datasize = btrfs_file_extent_calc_inline_size(offset + size); - ret = btrfs_insert_empty_item(trans, root, path, &key, - datasize); - if (ret) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) { err = ret; goto fail; } - leaf = path->nodes[0]; - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, ei, trans->transid); - btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); - ptr = btrfs_file_extent_inline_start(ei); - - kaddr = kmap_atomic(page, KM_USER1); - write_extent_buffer(leaf, kaddr + page_offset, ptr, size); - kunmap_atomic(kaddr, KM_USER1); + if (ret == 1) { + path->slots[0]--; + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) != + BTRFS_FILE_EXTENT_INLINE) { + goto insert; + } + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ret = 0; + } + if (ret == 0) { + u32 found_size; + u64 found_start; + + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, ei) != + BTRFS_FILE_EXTENT_INLINE) { + err = ret; + btrfs_print_leaf(root, leaf); + printk("found wasn't inline offset %Lu inode %lu\n", + offset, inode->i_ino); + goto fail; + } + found_start = key.offset; + found_size = btrfs_file_extent_inline_len(leaf, + btrfs_item_nr(leaf, path->slots[0])); + + if (found_size < offset + size) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(trans, root, &key, path, + offset + size - found_size - + found_start, 1); + BUG_ON(ret != 0); + ret = btrfs_extend_item(trans, root, path, + offset + size - found_size - + found_start); + if (ret) { + err = ret; + goto fail; + } + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + } + } else { +insert: + btrfs_release_path(root, path); + ret = btrfs_insert_empty_item(trans, root, path, &key, + datasize); + if (ret) { + err = ret; + printk("got bad ret %d\n", ret); + goto fail; + } + leaf = path->nodes[0]; + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + btrfs_set_file_extent_generation(leaf, ei, trans->transid); + btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE); + } + ptr = btrfs_file_extent_inline_start(ei) + offset; + + cur_size = size; + i = 0; + while (size > 0) { + page = pages[i]; + kaddr = kmap_atomic(page, KM_USER0); + cur_size = min(PAGE_CACHE_SIZE - page_offset, size); + write_extent_buffer(leaf, kaddr + page_offset, ptr, cur_size); + kunmap_atomic(kaddr, KM_USER0); + page_offset = 0; + ptr += cur_size; + size -= cur_size; + if (i >= num_pages) { + printk("i %d num_pages %d\n", i, num_pages); + } + i++; + } btrfs_mark_buffer_dirty(leaf); fail: btrfs_free_path(path); @@ -193,6 +269,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, err = btrfs_drop_extents(trans, root, inode, last_pos_in_file, last_pos_in_file + hole_size, + last_pos_in_file, &hint_byte); if (err) goto failed; @@ -210,11 +287,12 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, * either allocate an extent for the new bytes or setup the key * to show we are doing inline data in the extent */ - inline_size = end_pos - start_pos; - if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || - inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size >= PAGE_CACHE_SIZE) { + inline_size = end_pos; + if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || + inline_size > 16384 || + inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; + for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; SetPageUptodate(p); @@ -225,22 +303,18 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, set_extent_delalloc(em_tree, start_pos, end_of_last_block, GFP_NOFS); } else { - struct page *p = pages[0]; + u64 aligned_end; /* step one, delete the existing extents in this range */ + aligned_end = (pos + write_bytes + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); err = btrfs_drop_extents(trans, root, inode, start_pos, - (pos + write_bytes + root->sectorsize -1) & - ~((u64)root->sectorsize - 1), &hint_byte); + aligned_end, end_pos, &hint_byte); if (err) goto failed; - err = insert_inline_extent(trans, root, inode, start_pos, - end_pos - start_pos, p, 0); + end_pos - start_pos, pages, 0, + num_pages); BUG_ON(err); - em->start = start_pos; - em->end = end_pos - 1; - em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; - add_extent_mapping(em_tree, em); } if (end_pos > isize) { i_size_write(inode, end_pos); @@ -285,7 +359,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) */ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - u64 start, u64 end, u64 *hint_byte) + u64 start, u64 end, u64 inline_end, u64 *hint_byte) { int ret; struct btrfs_key key; @@ -401,8 +475,8 @@ next_slot: BUG_ON(ret); } } - WARN_ON(found_inline); - bookend = 1; + if (!found_inline) + bookend = 1; } /* truncate existing extent */ if (start > key.offset) { @@ -425,8 +499,14 @@ next_slot: btrfs_set_file_extent_num_bytes(leaf, extent, new_num); btrfs_mark_buffer_dirty(leaf); - } else { - WARN_ON(1); + } else if (end > extent_end && + key.offset < inline_end && + inline_end < extent_end) { + u32 new_size; + new_size = btrfs_file_extent_calc_inline_size( + inline_end - key.offset); + btrfs_truncate_item(trans, root, path, + new_size); } } /* delete the entire extent */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6b3a55ed8e0..84f496c838c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -87,7 +87,7 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) BUG_ON(!trans); num_bytes = (end - start + blocksize) & ~(blocksize - 1); ret = btrfs_drop_extents(trans, root, inode, - start, start + num_bytes, &alloc_hint); + start, start + num_bytes, start, &alloc_hint); ret = btrfs_alloc_extent(trans, root, inode->i_ino, num_bytes, 0, alloc_hint, (u64)-1, &ins, 1); @@ -776,7 +776,8 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, - pos, pos + hole_size, &alloc_hint); + pos, pos + hole_size, pos, + &alloc_hint); err = btrfs_insert_file_extent(trans, root, inode->i_ino, pos, 0, 0, hole_size); @@ -1581,7 +1582,9 @@ again: } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; char *map; - u32 size; + size_t size; + size_t extent_offset; + size_t copy_size; size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); @@ -1600,26 +1603,31 @@ again: goto not_found_em; } + extent_offset = (page->index << PAGE_CACHE_SHIFT) - + extent_start; + ptr = btrfs_file_extent_inline_start(item) + extent_offset; + map = kmap(page); + copy_size = min(PAGE_CACHE_SIZE - page_offset, + size - extent_offset); + em->block_start = EXTENT_MAP_INLINE; em->block_end = EXTENT_MAP_INLINE; - em->start = extent_start; - em->end = extent_end; + em->start = extent_start + extent_offset; + em->end = (em->start + copy_size -1) | + ((u64)root->sectorsize -1); if (!page) { goto insert; } - ptr = btrfs_file_extent_inline_start(item); - map = kmap(page); - read_extent_buffer(leaf, map + page_offset, ptr, size); + read_extent_buffer(leaf, map + page_offset, ptr, copy_size); /* - memset(map + page_offset + size, 0, - root->sectorsize - (page_offset + size)); + memset(map + page_offset + copy_size, 0, + PAGE_CACHE_SIZE - copy_size - page_offset); */ flush_dcache_page(page); kunmap(page); - set_extent_uptodate(em_tree, extent_start, - extent_end, GFP_NOFS); + set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5c4370f3a5b8..f94aa1f97a0a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -280,7 +280,6 @@ error_s: error_bdev: close_bdev_excl(bdev); error: -printk("get_sb failed\n"); return error; } /* end copy & paste */ @@ -295,7 +294,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); -printk("btrfs_get_sb returns %d\n", ret); return ret; } -- cgit v1.2.3 From 5103e947b9b7ac18ddb21a04ee3486e94c6504d7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 16 Nov 2007 11:45:54 -0500 Subject: xattr support for btrfs Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/acl.c | 66 +++++++ fs/btrfs/ctree.h | 22 ++- fs/btrfs/dir-item.c | 100 +++++++++- fs/btrfs/inode.c | 15 +- fs/btrfs/print-tree.c | 3 +- fs/btrfs/super.c | 2 + fs/btrfs/xattr.c | 497 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/xattr.h | 58 ++++++ 9 files changed, 751 insertions(+), 14 deletions(-) create mode 100644 fs/btrfs/acl.c create mode 100644 fs/btrfs/xattr.c create mode 100644 fs/btrfs/xattr.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 551743be5f03..d5804c5ca075 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,7 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o + extent_map.o sysfs.o struct-funcs.o xattr.o acl.o #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \ diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c new file mode 100644 index 000000000000..aee9f0657c35 --- /dev/null +++ b/fs/btrfs/acl.c @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include "ctree.h" +#include "xattr.h" + +static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, + value, size); +} + +static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, name, + value, size, flags); +} + +static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return btrfs_xattr_get(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + name, value, size); +} + +static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return btrfs_xattr_set(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + name, value, size, flags); +} + +struct xattr_handler btrfs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_acl_default_get, + .set = btrfs_xattr_acl_default_set, +}; + +struct xattr_handler btrfs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_acl_access_get, + .set = btrfs_xattr_acl_access_set, +}; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 030d21d7f98c..27cadae1af63 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -63,7 +63,8 @@ extern struct kmem_cache *btrfs_path_cachep; #define BTRFS_FT_FIFO 5 #define BTRFS_FT_SOCK 6 #define BTRFS_FT_SYMLINK 7 -#define BTRFS_FT_MAX 8 +#define BTRFS_FT_XATTR 8 +#define BTRFS_FT_MAX 9 /* * the key defines the order in the tree, and so it also defines (optimal) @@ -226,7 +227,7 @@ struct btrfs_inode_item { struct btrfs_dir_item { struct btrfs_disk_key location; - __le16 flags; + __le16 data_len; __le16 name_len; u8 type; } __attribute__ ((__packed__)); @@ -367,7 +368,7 @@ struct btrfs_root { * the FS */ #define BTRFS_INODE_ITEM_KEY 1 - +#define BTRFS_XATTR_ITEM_KEY 2 /* reserve 2-15 close to the inode for later flexibility */ /* @@ -621,7 +622,7 @@ static inline void btrfs_set_item_key(struct extent_buffer *eb, } /* struct btrfs_dir_item */ -BTRFS_SETGET_FUNCS(dir_flags, struct btrfs_dir_item, flags, 16); +BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); @@ -962,6 +963,15 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_dir_item *di); +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir); +struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, u16 name_len, + int mod); /* inode-map.c */ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_root *fs_root, @@ -1039,4 +1049,8 @@ int btrfs_sysfs_add_root(struct btrfs_root *root); void btrfs_sysfs_del_root(struct btrfs_root *root); void btrfs_sysfs_del_super(struct btrfs_fs_info *root); +/* xattr.c */ +ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode); #endif diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 514a1dc337a8..ddbe12ae0d63 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -43,8 +43,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return ERR_PTR(-EEXIST); ret = btrfs_extend_item(trans, root, path, data_size); WARN_ON(ret > 0); - if (ret) - return ERR_PTR(ret); } if (ret < 0) return ERR_PTR(ret); @@ -57,6 +55,57 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return (struct btrfs_dir_item *)ptr; } +int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, + struct btrfs_root *root, const char *name, + u16 name_len, const void *data, u16 data_len, + u64 dir) +{ + int ret = 0; + struct btrfs_path *path; + struct btrfs_dir_item *dir_item; + unsigned long name_ptr, data_ptr; + struct btrfs_key key, location; + struct btrfs_disk_key disk_key; + struct extent_buffer *leaf; + u32 data_size; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + data_size = sizeof(*dir_item) + name_len + data_len; + dir_item = insert_with_overflow(trans, root, path, &key, data_size, + name, name_len); + /* + * FIXME: at some point we should handle xattr's that are larger than + * what we can fit in our leaf. We set location to NULL b/c we arent + * pointing at anything else, that will change if we store the xattr + * data in a separate inode. + */ + BUG_ON(IS_ERR(dir_item)); + memset(&location, 0, sizeof(location)); + + leaf = path->nodes[0]; + btrfs_cpu_key_to_disk(&disk_key, &location); + btrfs_set_dir_item_key(leaf, dir_item, &disk_key); + btrfs_set_dir_type(leaf, dir_item, BTRFS_FT_XATTR); + btrfs_set_dir_name_len(leaf, dir_item, name_len); + btrfs_set_dir_data_len(leaf, dir_item, data_len); + name_ptr = (unsigned long)(dir_item + 1); + data_ptr = (unsigned long)((char *)name_ptr + name_len); + + write_extent_buffer(leaf, name, name_ptr, name_len); + write_extent_buffer(leaf, data, data_ptr, data_len); + btrfs_mark_buffer_dirty(path->nodes[0]); + + btrfs_free_path(path); + return ret; +} + int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type) @@ -90,7 +139,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root btrfs_cpu_key_to_disk(&disk_key, location); btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, type); - btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); name_ptr = (unsigned long)(dir_item + 1); @@ -117,7 +166,7 @@ second_insert: btrfs_cpu_key_to_disk(&disk_key, location); btrfs_set_dir_item_key(leaf, dir_item, &disk_key); btrfs_set_dir_type(leaf, dir_item, type); - btrfs_set_dir_flags(leaf, dir_item, 0); + btrfs_set_dir_data_len(leaf, dir_item, 0); btrfs_set_dir_name_len(leaf, dir_item, name_len); name_ptr = (unsigned long)(dir_item + 1); write_extent_buffer(leaf, name, name_ptr, name_len); @@ -194,6 +243,43 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, u64 dir, + const char *name, u16 name_len, + int mod) +{ + int ret; + struct btrfs_key key; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + struct btrfs_key found_key; + struct extent_buffer *leaf; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + ret = btrfs_name_hash(name, name_len, &key.offset); + BUG_ON(ret); + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) { + if (path->slots[0] == 0) + return NULL; + path->slots[0]--; + } + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != dir || + btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY || + found_key.offset != key.offset) + return NULL; + + return btrfs_match_dir_item_name(root, path, name, name_len); +} + struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) @@ -210,7 +296,8 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, total_len = btrfs_item_size_nr(leaf, path->slots[0]); while(cur < total_len) { this_len = sizeof(*dir_item) + - btrfs_dir_name_len(leaf, dir_item); + btrfs_dir_name_len(leaf, dir_item) + + btrfs_dir_data_len(leaf, dir_item); name_ptr = (unsigned long)(dir_item + 1); if (btrfs_dir_name_len(leaf, dir_item) == name_len && @@ -236,7 +323,8 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, int ret = 0; leaf = path->nodes[0]; - sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di); + sub_item_len = sizeof(*di) + btrfs_dir_name_len(leaf, di) + + btrfs_dir_data_len(leaf, di); item_len = btrfs_item_size_nr(leaf, path->slots[0]); if (sub_item_len == item_len) { ret = btrfs_del_item(trans, root, path); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d6d32465bc9a..37f8e0321af8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -839,6 +840,9 @@ void btrfs_delete_inode(struct inode *inode) btrfs_set_trans_block_group(trans, inode); ret = btrfs_truncate_in_trans(trans, root, inode); + if (ret) + goto no_delete_lock; + ret = btrfs_delete_xattrs(trans, root, inode); if (ret) goto no_delete_lock; ret = btrfs_free_inode(trans, root, inode); @@ -1110,7 +1114,8 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (over) goto nopos; - di_len = btrfs_dir_name_len(leaf, di) + sizeof(*di); + di_len = btrfs_dir_name_len(leaf, di) + + btrfs_dir_data_len(leaf, di) +sizeof(*di); di_cur += di_len; di = (struct btrfs_dir_item *)((char *)di + di_len); } @@ -2519,6 +2524,10 @@ static struct inode_operations btrfs_dir_inode_operations = { .symlink = btrfs_symlink, .setattr = btrfs_setattr, .mknod = btrfs_mknod, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_dir_ro_inode_operations = { @@ -2567,6 +2576,10 @@ static struct inode_operations btrfs_file_inode_operations = { .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_special_inode_operations = { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 9f8696c8a8e8..030324febf6c 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -58,9 +58,8 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) case BTRFS_DIR_ITEM_KEY: di = btrfs_item_ptr(l, i, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(l, di, &found_key); - printk("\t\tdir oid %llu flags %u type %u\n", + printk("\t\tdir oid %llu type %u\n", (unsigned long long)found_key.objectid, - btrfs_dir_flags(l, di), btrfs_dir_type(l, di)); break; case BTRFS_ROOT_ITEM_KEY: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f94aa1f97a0a..c46bc3911798 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -40,6 +40,7 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" +#include "xattr.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -106,6 +107,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; + sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; tree_root = open_ctree(sb); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c new file mode 100644 index 000000000000..f4ac5e0bbad1 --- /dev/null +++ b/fs/btrfs/xattr.c @@ -0,0 +1,497 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include +#include +#include +#include "ctree.h" +#include "btrfs_inode.h" +#include "transaction.h" +#include "xattr.h" +#include "disk-io.h" + +static struct xattr_handler *btrfs_xattr_handler_map[] = { + [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, + [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, + [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, + [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, +}; + +struct xattr_handler *btrfs_xattr_handlers[] = { + &btrfs_xattr_user_handler, + &btrfs_xattr_acl_access_handler, + &btrfs_xattr_acl_default_handler, + &btrfs_xattr_trusted_handler, + &btrfs_xattr_security_handler, + &btrfs_xattr_system_handler, + NULL, +}; + +/* + * @param name - the xattr name + * @return - the xattr_handler for the xattr, NULL if its not found + * + * use this with listxattr where we don't already know the type of xattr we + * have + */ +static struct xattr_handler *find_btrfs_xattr_handler(struct extent_buffer *l, + unsigned long name_ptr, + u16 name_len) +{ + struct xattr_handler *handler = NULL; + int i = 0; + + for (handler = btrfs_xattr_handlers[i]; handler != NULL; i++, + handler = btrfs_xattr_handlers[i]) { + u16 prefix_len = strlen(handler->prefix); + + if (name_len < prefix_len) + continue; + + if (memcmp_extent_buffer(l, handler->prefix, name_ptr, + prefix_len) == 0) + break; + } + + return handler; +} + +/* + * @param name_index - the index for the xattr handler + * @return the xattr_handler if we found it, NULL otherwise + * + * use this if we know the type of the xattr already + */ +static struct xattr_handler *btrfs_xattr_handler(int name_index) +{ + struct xattr_handler *handler = NULL; + + if (name_index >= 0 && + name_index < ARRAY_SIZE(btrfs_xattr_handler_map)) + handler = btrfs_xattr_handler_map[name_index]; + + return handler; +} + +static inline char *get_name(const char *name, int name_index) +{ + char *ret = NULL; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + int prefix_len; + + if (!handler) + return ret; + + prefix_len = strlen(handler->prefix); + + ret = kmalloc(strlen(name) + prefix_len + 1, GFP_KERNEL); + if (!ret) + return ret; + + memcpy(ret, handler->prefix, prefix_len); + memcpy(ret+prefix_len, name, strlen(name)); + ret[prefix_len + strlen(name)] = '\0'; + + return ret; +} + +size_t btrfs_xattr_generic_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len) +{ + if (list && (name_len+1) <= list_size) { + memcpy(list, name, name_len); + list[name_len] = '\0'; + } else + return -ERANGE; + + return name_len+1; +} + +ssize_t btrfs_xattr_get(struct inode *inode, int name_index, + const char *attr_name, void *buffer, size_t size) +{ + struct btrfs_dir_item *di; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + int ret = 0; + unsigned long data_ptr; + char *name; + + if (!handler) + return -EOPNOTSUPP; + + /* just in case... */ + if (*attr_name == '\0') + return -EINVAL; + + name = get_name(attr_name, name_index); + if (!name) + return -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) { + kfree(name); + return -ENOMEM; + } + + mutex_lock(&root->fs_info->fs_mutex); + /* lookup the xattr by name */ + di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, + strlen(name), 0); + if (!di || IS_ERR(di)) { + ret = -ENODATA; + goto out; + } + + leaf = path->nodes[0]; + /* if size is 0, that means we want the size of the attr */ + if (!size) { + ret = btrfs_dir_data_len(leaf, di); + goto out; + } + + /* now get the data out of our dir_item */ + if (btrfs_dir_data_len(leaf, di) > size) { + ret = -ERANGE; + goto out; + } + data_ptr = (unsigned long)((char *)(di + 1) + + btrfs_dir_name_len(leaf, di)); + read_extent_buffer(leaf, buffer, data_ptr, + btrfs_dir_name_len(leaf, di)); + ret = btrfs_dir_data_len(leaf, di); + +out: + mutex_unlock(&root->fs_info->fs_mutex); + kfree(name); + btrfs_free_path(path); + return ret; +} + +int btrfs_xattr_set(struct inode *inode, int name_index, + const char *attr_name, const void *value, size_t size, + int flags) +{ + struct btrfs_dir_item *di; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct xattr_handler *handler = btrfs_xattr_handler(name_index); + char *name; + int ret = 0, mod = 0; + + if (!handler) + return -EOPNOTSUPP; + + /* just in case... */ + if (*attr_name == '\0') + return -EINVAL; + + name = get_name(attr_name, name_index); + if (!name) + return -ENOMEM; + + path = btrfs_alloc_path(); + if (!path) { + kfree(name); + return -ENOMEM; + } + + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + /* first lets see if we already have this xattr */ + di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name, + strlen(name), -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } + + /* ok we already have this xattr, lets remove it */ + if (di) { + /* if we want create only exit */ + if (flags & XATTR_CREATE) { + ret = -EEXIST; + goto out; + } + + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) + goto out; + btrfs_release_path(root, path); + + /* if we don't have a value then we are removing the xattr */ + if (!value) { + mod = 1; + goto out; + } + } else if (flags & XATTR_REPLACE) { + /* we couldn't find the attr to replace, so error out */ + ret = -ENODATA; + goto out; + } + + /* ok we have to create a completely new xattr */ + ret = btrfs_insert_xattr_item(trans, root, name, strlen(name), + value, size, inode->i_ino); + if (ret) + goto out; + mod = 1; + +out: + if (mod) { + inode->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, inode); + } + + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + kfree(name); + btrfs_free_path(path); + + return ret; +} + +ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct btrfs_key key, found_key; + struct inode *inode = dentry->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct btrfs_item *item; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct xattr_handler *handler; + int ret = 0, slot, advance; + size_t total_size = 0, size_left = size, written; + unsigned long name_ptr; + char *name; + u32 nritems; + + /* + * ok we want all objects associated with this id. + * NOTE: we set key.offset = 0; because we want to start with the + * first xattr that we find and walk forward + */ + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.offset = 0; + + path = btrfs_alloc_path(); + path->reada = 2; + if (!path) + return -ENOMEM; + + mutex_lock(&root->fs_info->fs_mutex); + + /* search for our xattrs */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto err; + ret = 0; + advance = 0; + while (1) { + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + slot = path->slots[0]; + + /* this is where we start walking through the path */ + if (advance || slot >= nritems) { + /* + * if we've reached the last slot in this leaf we need + * to go to the next leaf and reset everything + */ + if (slot >= nritems-1) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + slot = path->slots[0]; + } else { + /* + * just walking through the slots on this leaf + */ + slot++; + path->slots[0]++; + } + } + advance = 1; + + item = btrfs_item_nr(leaf, slot); + btrfs_item_key_to_cpu(leaf, &found_key, slot); + + /* check to make sure this item is what we want */ + if (found_key.objectid != key.objectid) + break; + if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) + break; + + di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + + total_size += btrfs_dir_name_len(leaf, di)+1; + + /* we are just looking for how big our buffer needs to be */ + if (!size) + continue; + + /* find our handler for this xattr */ + name_ptr = (unsigned long)(di + 1); + handler = find_btrfs_xattr_handler(leaf, name_ptr, + btrfs_dir_name_len(leaf, di)); + if (!handler) { + printk(KERN_ERR "btrfs: unsupported xattr found\n"); + continue; + } + + name = kmalloc(btrfs_dir_name_len(leaf, di), GFP_KERNEL); + read_extent_buffer(leaf, name, name_ptr, + btrfs_dir_name_len(leaf, di)); + + /* call the list function associated with this xattr */ + written = handler->list(inode, buffer, size_left, name, + btrfs_dir_name_len(leaf, di)); + kfree(name); + + if (written < 0) { + ret = -ERANGE; + break; + } + + size_left -= written; + buffer += written; + } + ret = total_size; + +err: + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_free_path(path); + + return ret; +} + +/* + * delete all the xattrs associated with the inode. fs_mutex should be + * held when we come into here + */ +int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + struct btrfs_path *path; + struct btrfs_key key, found_key; + struct btrfs_item *item; + struct extent_buffer *leaf; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY); + key.offset = (u64)-1; + + while(1) { + /* look for our next xattr */ + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + if (path->slots[0] == 0) + break; + + path->slots[0]--; + leaf = path->nodes[0]; + item = btrfs_item_nr(leaf, path->slots[0]); + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + if (found_key.objectid != key.objectid) + break; + if (btrfs_key_type(&found_key) != BTRFS_XATTR_ITEM_KEY) + break; + + ret = btrfs_del_item(trans, root, path); + BUG_ON(ret); + btrfs_release_path(root, path); + } + ret = 0; +out: + btrfs_free_path(path); + + return ret; +} + +/* + * Handler functions + */ +#define BTRFS_XATTR_SETGET_FUNCS(name, index) \ +static int btrfs_xattr_##name##_get(struct inode *inode, \ + const char *name, void *value, \ + size_t size) \ +{ \ + return btrfs_xattr_get(inode, index, name, value, size); \ +} \ +static int btrfs_xattr_##name##_set(struct inode *inode, \ + const char *name, const void *value,\ + size_t size, int flags) \ +{ \ + return btrfs_xattr_set(inode, index, name, value, size, flags); \ +} \ + +BTRFS_XATTR_SETGET_FUNCS(security, BTRFS_XATTR_INDEX_SECURITY); +BTRFS_XATTR_SETGET_FUNCS(system, BTRFS_XATTR_INDEX_SYSTEM); +BTRFS_XATTR_SETGET_FUNCS(user, BTRFS_XATTR_INDEX_USER); +BTRFS_XATTR_SETGET_FUNCS(trusted, BTRFS_XATTR_INDEX_TRUSTED); + +struct xattr_handler btrfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_security_get, + .set = btrfs_xattr_security_set, +}; + +struct xattr_handler btrfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_system_get, + .set = btrfs_xattr_system_set, +}; + +struct xattr_handler btrfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_user_get, + .set = btrfs_xattr_user_set, +}; + +struct xattr_handler btrfs_xattr_trusted_handler = { + .prefix = XATTR_USER_PREFIX, + .list = btrfs_xattr_generic_list, + .get = btrfs_xattr_trusted_get, + .set = btrfs_xattr_trusted_set, +}; diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h new file mode 100644 index 000000000000..b2e47e3f2442 --- /dev/null +++ b/fs/btrfs/xattr.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2007 Red Hat. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __XATTR__ +#define __XATTR__ + +#include +#include "ctree.h" + +/* Name indexes */ +enum { + BTRFS_XATTR_INDEX_USER, + BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, + BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, + BTRFS_XATTR_INDEX_TRUSTED, + BTRFS_XATTR_INDEX_SECURITY, + BTRFS_XATTR_INDEX_SYSTEM, + BTRFS_XATTR_INDEX_END, +}; + +extern struct xattr_handler btrfs_xattr_user_handler; +extern struct xattr_handler btrfs_xattr_trusted_handler; +extern struct xattr_handler btrfs_xattr_acl_access_handler; +extern struct xattr_handler btrfs_xattr_acl_default_handler; +extern struct xattr_handler btrfs_xattr_security_handler; +extern struct xattr_handler btrfs_xattr_system_handler; + +extern struct xattr_handler *btrfs_xattr_handlers[]; + +ssize_t btrfs_xattr_get(struct inode *inode, int name_index, const char *name, + void *buffer, size_t size); +int btrfs_xattr_set(struct inode *inode, int name_index, const char *name, + const void *value, size_t size, int flags); + +/* + * the only reason this is public is for acl.c. There may be a point where + * acl.c doesn't need it, and if thats the case we need to remove it and make + * it static in xattr.c + */ +size_t btrfs_xattr_generic_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len); +#endif /* __XATTR__ */ -- cgit v1.2.3 From 2f4cbe6442d3be7b5e4cf1607a5ab33995fe2d25 Mon Sep 17 00:00:00 2001 From: Wyatt Banks Date: Mon, 19 Nov 2007 10:22:33 -0500 Subject: Btrfs: Return value checking in module init Signed-off-by: Chris Mason --- fs/btrfs/extent_map.c | 16 ++++++++++++++-- fs/btrfs/extent_map.h | 2 +- fs/btrfs/super.c | 21 ++++++++++++++++++--- 3 files changed, 33 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b3ff63c4bf2a..f91f28efdb59 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -42,18 +42,30 @@ struct extent_page_data { struct extent_map_tree *tree; get_extent_t *get_extent; }; - -void __init extent_map_init(void) +int __init extent_map_init(void) { extent_map_cache = btrfs_cache_create("extent_map", sizeof(struct extent_map), 0, NULL); + if (!extent_map_cache) + return -ENOMEM; extent_state_cache = btrfs_cache_create("extent_state", sizeof(struct extent_state), 0, NULL); + if (!extent_state_cache) + goto free_map_cache; extent_buffer_cache = btrfs_cache_create("extent_buffers", sizeof(struct extent_buffer), 0, NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); +free_map_cache: + kmem_cache_destroy(extent_map_cache); + return -ENOMEM; } void __exit extent_map_exit(void) diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index fbd23a72f53b..24ddc8c93461 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -110,7 +110,7 @@ struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); int extent_read_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent); -void __init extent_map_init(void); +int __init extent_map_init(void); void __exit extent_map_exit(void); int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c46bc3911798..ffa02872c4d6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -347,9 +347,24 @@ static int __init init_btrfs_fs(void) btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - return err; - extent_map_init(); - return register_filesystem(&btrfs_fs_type); + goto free_transaction_sys; + err = extent_map_init(); + if (err) + goto free_cachep; + + err = register_filesystem(&btrfs_fs_type); + if (err) + goto free_extent_map; + return 0; + +free_extent_map: + extent_map_exit(); +free_cachep: + btrfs_destroy_cachep(); +free_transaction_sys: + btrfs_exit_transaction_sys(); + btrfs_exit_sysfs(); + return err; } static void __exit exit_btrfs_fs(void) -- cgit v1.2.3 From b6cda9bcb4df7544c67fc3548a53bc1607d59f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Dec 2007 15:30:32 -0500 Subject: Btrfs: Add mount -o nodatasum to turn of file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 14 ++++++++------ fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 11 ++++++++++- fs/btrfs/super.c | 17 +++++++++++++++-- 4 files changed, 34 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 052555ced44d..5e255cabfd10 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -321,6 +321,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; + unsigned long mount_opt; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; @@ -429,6 +430,13 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 +#define BTRFS_MOUNT_NODATASUM 0x1 + +#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) +#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) +#define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ + BTRFS_MOUNT_##opt) + /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: @@ -906,12 +914,6 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) -/* mount option defines and helpers */ -#define BTRFS_MOUNT_SUBVOL 0x000001 -#define btrfs_clear_opt(o, opt) o &= ~BTRFS_MOUNT_##opt -#define btrfs_set_opt(o, opt) o |= BTRFS_MOUNT_##opt -#define btrfs_test_opt(sb, opt) (BTRFS_SB(sb)->s_mount_opt & \ - BTRFS_MOUNT_##opt) /* extent-tree.c */ int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af00..4338b00c2a25 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -568,6 +568,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; + fs_info->mount_opt = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e535c50e669b..11885cb114e2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -116,10 +116,13 @@ int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; char *kaddr; - int ret; + int ret = 0; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; size_t offset = start - page_start; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -143,6 +146,9 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) struct btrfs_path *path = NULL; u32 csum; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); @@ -176,6 +182,9 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM)) + return 0; + ret = get_state_private(em_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ffa02872c4d6..ad4f280ec777 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,11 +61,12 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, + {Opt_nodatasum, "nodatasum"}, {Opt_err, NULL} }; @@ -74,7 +75,12 @@ static int parse_options (char * options, char **subvol_name) { char * p; + struct btrfs_fs_info *info = NULL; substring_t args[MAX_OPT_ARGS]; + + if (root) + info = root->fs_info; + if (!options) return 1; @@ -86,7 +92,12 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: - *subvol_name = match_strdup(&args[0]); + if (subvol_name) + *subvol_name = match_strdup(&args[0]); + break; + case Opt_nodatasum: + if (root) + btrfs_set_opt(info->mount_opt, NODATASUM); break; default: return 0; @@ -143,6 +154,8 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) goto fail_close; } + parse_options((char *)data, tree_root, NULL); + /* this does the super kobj at the same time */ err = btrfs_sysfs_add_super(tree_root->fs_info); if (err) -- cgit v1.2.3 From be20aa9dbadc8c06283784ee12bbc0d97dea3418 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:01 -0500 Subject: Btrfs: Add mount option to turn off data cow A number of workloads do not require copy on write data or checksumming. mount -o nodatasum to disable checksums and -o nodatacow to disable both copy on write and checksumming. In nodatacow mode, copy on write is still performed when a given extent is under snapshot. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 53 ++++++++++++++++++++- fs/btrfs/ctree.h | 8 ++++ fs/btrfs/extent-tree.c | 101 ++++++++++++++++++++++++++++++++++++++-- fs/btrfs/inode.c | 122 +++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/super.c | 34 +++++++++++--- 5 files changed, 293 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fd8233e05cf4..585f279d1112 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -70,7 +70,58 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) memset(p, 0, sizeof(*p)); } -static int __btrfs_cow_block(struct btrfs_trans_handle *trans, +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid) +{ + struct extent_buffer *cow; + u32 nritems; + int ret = 0; + int level; + struct btrfs_key first_key; + struct btrfs_root new_root; + + memcpy(&new_root, root, sizeof(new_root)); + new_root.root_key.objectid = new_root_objectid; + + WARN_ON(root->ref_cows && trans->transid != + root->fs_info->running_transaction->transid); + WARN_ON(root->ref_cows && trans->transid != root->last_trans); + + level = btrfs_header_level(buf); + nritems = btrfs_header_nritems(buf); + if (nritems) { + if (level == 0) + btrfs_item_key_to_cpu(buf, &first_key, 0); + else + btrfs_node_key_to_cpu(buf, &first_key, 0); + } else { + first_key.objectid = 0; + } + cow = __btrfs_alloc_free_block(trans, &new_root, buf->len, + new_root_objectid, + trans->transid, first_key.objectid, + level, buf->start, 0); + if (IS_ERR(cow)) + return PTR_ERR(cow); + + copy_extent_buffer(cow, buf, 0, 0, cow->len); + btrfs_set_header_bytenr(cow, cow->start); + btrfs_set_header_generation(cow, trans->transid); + btrfs_set_header_owner(cow, new_root_objectid); + + WARN_ON(btrfs_header_generation(buf) > trans->transid); + ret = btrfs_inc_ref(trans, &new_root, buf); + if (ret) + return ret; + + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; + return 0; +} + +int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e255cabfd10..b51b021fff85 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -431,6 +431,7 @@ struct btrfs_root { #define BTRFS_STRING_ITEM_KEY 253 #define BTRFS_MOUNT_NODATASUM 0x1 +#define BTRFS_MOUNT_NODATACOW 0x2 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -915,6 +916,9 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { btrfs_item_offset_nr(leaf, slot))) /* extent-tree.c */ +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); @@ -974,6 +978,10 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, struct extent_buffer **cow_ret); +int btrfs_copy_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer **cow_ret, u64 new_root_objectid); int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size); int btrfs_truncate_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 00414836e963..1412d556313f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -679,6 +679,104 @@ out: return 0; } +u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, + struct btrfs_path *count_path, + u64 first_extent) +{ + struct btrfs_root *extent_root = root->fs_info->extent_root; + struct btrfs_path *path; + u64 bytenr; + u64 found_objectid; + u64 root_objectid = 0; + u32 total_count = 0; + u32 cur_count; + u32 refs; + u32 nritems; + int ret; + struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *l; + struct btrfs_extent_item *item; + struct btrfs_extent_ref *ref_item; + int level = -1; + + path = btrfs_alloc_path(); +again: + if (level == -1) + bytenr = first_extent; + else + bytenr = count_path->nodes[level]->start; + + cur_count = 0; + key.objectid = bytenr; + key.offset = 0; + + btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + l = path->nodes[0]; + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_ITEM_KEY) { + goto out; + } + + item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); + refs = btrfs_extent_refs(l, item); + while (1) { + nritems = btrfs_header_nritems(l); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret == 0) + continue; + break; + } + btrfs_item_key_to_cpu(l, &found_key, path->slots[0]); + if (found_key.objectid != bytenr) + break; + if (found_key.type != BTRFS_EXTENT_REF_KEY) { + path->slots[0]++; + continue; + } + + cur_count++; + ref_item = btrfs_item_ptr(l, path->slots[0], + struct btrfs_extent_ref); + found_objectid = btrfs_ref_root(l, ref_item); + + if (found_objectid != root_objectid) + total_count++; + + if (total_count > 1) + goto out; + + if (root_objectid == 0) + root_objectid = found_objectid; + + path->slots[0]++; + } + if (cur_count == 0) { + total_count = 0; + goto out; + } + if (total_count > 1) + goto out; + if (level >= 0 && root->node == count_path->nodes[level]) + goto out; + level++; + btrfs_release_path(root, path); + goto again; + +out: + btrfs_free_path(path); + return total_count; + +} + int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 owner_objectid) { @@ -1127,9 +1225,6 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root if (!path) return -ENOMEM; - if (ref_generation && owner_objectid == 0 && root_objectid == 3) { -//printk("drop backref root %Lu gen %Lu byte %Lu\n", root_objectid, ref_generation, bytenr ); - } ret = lookup_extent_backref(trans, extent_root, path, bytenr, root_objectid, ref_generation, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 11885cb114e2..91f3fc43e2a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -72,21 +72,22 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - struct btrfs_key ins; u64 alloc_hint = 0; u64 num_bytes; - int ret; u64 blocksize = root->sectorsize; + struct btrfs_key ins; + int ret; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); BUG_ON(!trans); + btrfs_set_trans_block_group(trans, inode); + num_bytes = (end - start + blocksize) & ~(blocksize - 1); + num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); @@ -106,6 +107,101 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) ins.offset); out: btrfs_end_transaction(trans, root); + return ret; +} + +static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) +{ + u64 extent_start; + u64 extent_end; + u64 bytenr; + u64 cow_end; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_buffer *leaf; + int found_type; + struct btrfs_path *path; + struct btrfs_file_extent_item *item; + int ret; + int err; + struct btrfs_key found_key; + + path = btrfs_alloc_path(); + BUG_ON(!path); +again: + ret = btrfs_lookup_file_extent(NULL, root, path, + inode->i_ino, start, 0); + if (ret < 0) { + btrfs_free_path(path); + return ret; + } + + cow_end = end; + if (ret != 0) { + if (path->slots[0] == 0) + goto not_found; + path->slots[0]--; + } + + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + /* are we inside the extent that was found? */ + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + found_type = btrfs_key_type(&found_key); + if (found_key.objectid != inode->i_ino || + found_type != BTRFS_EXTENT_DATA_KEY) { + goto not_found; + } + + found_type = btrfs_file_extent_type(leaf, item); + extent_start = found_key.offset; + if (found_type == BTRFS_FILE_EXTENT_REG) { + extent_end = extent_start + + btrfs_file_extent_num_bytes(leaf, item); + err = 0; + + if (start < extent_start || start >= extent_end) + goto not_found; + + cow_end = min(end, extent_end - 1); + bytenr = btrfs_file_extent_disk_bytenr(leaf, item); + if (bytenr == 0) + goto not_found; + + bytenr += btrfs_file_extent_offset(leaf, item); + if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) { + goto not_found; + } + + start = extent_end; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + goto not_found; + } +loop: + if (start > end) { + btrfs_free_path(path); + return 0; + } + btrfs_release_path(root, path); + goto again; + +not_found: + cow_file_range(inode, start, cow_end); + start = cow_end + 1; + goto loop; +} + +static int run_delalloc_range(struct inode *inode, u64 start, u64 end) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + int ret; + + mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW)) + ret = run_delalloc_nocow(inode, start, end); + else + ret = cow_file_range(inode, start, end); mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1907,9 +2003,6 @@ int btrfs_commit_write(struct file *file, struct page *page, btrfs_cow_one_page(inode, page, PAGE_CACHE_SIZE); - set_page_extent_mapped(page); - set_page_dirty(page); - if (pos > inode->i_size) { i_size_write(inode, pos); mark_inode_dirty(inode); @@ -2078,13 +2171,18 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) key.objectid = objectid; key.offset = 1; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + extent_buffer_get(root->node); btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp); free_extent_buffer(tmp); - btrfs_set_root_bytenr(&new_root_item, root->node->start); - btrfs_set_root_level(&new_root_item, btrfs_header_level(root->node)); + + btrfs_copy_root(trans, root, root->node, &tmp, objectid); + + btrfs_set_root_bytenr(&new_root_item, tmp->start); + btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp)); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, &new_root_item); + free_extent_buffer(tmp); if (ret) goto fail; @@ -2106,10 +2204,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (ret) goto fail; - - ret = btrfs_inc_root_ref(trans, root, objectid); - if (ret) - goto fail; fail: nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ad4f280ec777..2116728d1f98 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -61,12 +61,13 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, + {Opt_nodatacow, "nodatacow"}, {Opt_err, NULL} }; @@ -78,12 +79,20 @@ static int parse_options (char * options, struct btrfs_fs_info *info = NULL; substring_t args[MAX_OPT_ARGS]; - if (root) - info = root->fs_info; - if (!options) return 1; + /* + * strsep changes the string, duplicate it because parse_options + * gets called twice + */ + options = kstrdup(options, GFP_NOFS); + if (!options) + return -ENOMEM; + + if (root) + info = root->fs_info; + while ((p = strsep (&options, ",")) != NULL) { int token; if (!*p) @@ -92,17 +101,28 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { case Opt_subvol: - if (subvol_name) + if (subvol_name) { *subvol_name = match_strdup(&args[0]); + } break; case Opt_nodatasum: - if (root) + if (info) { + printk("btrfs: setting nodatacsum\n"); btrfs_set_opt(info->mount_opt, NODATASUM); + } + break; + case Opt_nodatacow: + if (info) { + printk("btrfs: setting nodatacow\n"); + btrfs_set_opt(info->mount_opt, NODATACOW); + btrfs_set_opt(info->mount_opt, NODATASUM); + } break; default: - return 0; + break; } } + kfree(options); return 1; } -- cgit v1.2.3 From c59f8951d48c5eb000926935f3ab063d8181d1ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:04 -0500 Subject: Btrfs: Add mount option to enforce a max extent size Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 28 ++++++++++++++++++---------- fs/btrfs/super.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b51b021fff85..32b24460ec82 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -322,6 +322,7 @@ struct btrfs_fs_info { u64 generation; u64 last_trans_committed; unsigned long mount_opt; + u64 max_extent; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 278564178993..7213012c27d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->sb = sb; fs_info->mount_opt = 0; + fs_info->max_extent = (u64)-1; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 91f3fc43e2a9..686dd03f34f2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -78,6 +78,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; u64 alloc_hint = 0; u64 num_bytes; + u64 cur_alloc_size; u64 blocksize = root->sectorsize; struct btrfs_key ins; int ret; @@ -94,17 +95,24 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) if (alloc_hint == EXTENT_MAP_INLINE) goto out; - ret = btrfs_alloc_extent(trans, root, num_bytes, - root->root_key.objectid, trans->transid, - inode->i_ino, start, 0, - alloc_hint, (u64)-1, &ins, 1); - if (ret) { - WARN_ON(1); - goto out; + while(num_bytes > 0) { + cur_alloc_size = min(num_bytes, root->fs_info->max_extent); + ret = btrfs_alloc_extent(trans, root, cur_alloc_size, + root->root_key.objectid, + trans->transid, + inode->i_ino, start, 0, + alloc_hint, (u64)-1, &ins, 1); + if (ret) { + WARN_ON(1); + goto out; + } + ret = btrfs_insert_file_extent(trans, root, inode->i_ino, + start, ins.objectid, ins.offset, + ins.offset); + num_bytes -= cur_alloc_size; + alloc_hint = ins.objectid + ins.offset; + start += cur_alloc_size; } - ret = btrfs_insert_file_extent(trans, root, inode->i_ino, - start, ins.objectid, ins.offset, - ins.offset); out: btrfs_end_transaction(trans, root); return ret; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2116728d1f98..b7323914bf5a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -61,16 +62,42 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, + {Opt_max_extent, "max_extent=%s"}, {Opt_err, NULL} }; +static unsigned long parse_size(char *str) +{ + unsigned long res; + int mult = 1; + char *end; + char last; + + res = simple_strtoul(str, &end, 10); + + last = end[0]; + if (isalpha(last)) { + last = tolower(last); + switch (last) { + case 'g': + mult *= 1024; + case 'm': + mult *= 1024; + case 'k': + mult *= 1024; + } + res = res * mult; + } + return res; +} + static int parse_options (char * options, struct btrfs_root *root, char **subvol_name) @@ -118,6 +145,21 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_max_extent: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->max_extent = parse_size(num); + kfree(num); + + info->max_extent = max_t(u64, + info->max_extent, + root->sectorsize); + printk("btrfs: max_extent at %Lu\n", + info->max_extent); + } + } + break; default: break; } @@ -329,6 +371,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, btrfs_fill_super, mnt, subvol_name ? subvol_name : "default"); + if (subvol_name) + kfree(subvol_name); return ret; } -- cgit v1.2.3 From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:15:09 -0500 Subject: Btrfs: Back port to 2.6.18-el kernels Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 ++++ fs/btrfs/disk-io.c | 5 +- fs/btrfs/extent_map.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/file.c | 15 +++-- fs/btrfs/inode.c | 21 +++++-- fs/btrfs/super.c | 1 + fs/btrfs/sysfs.c | 23 ++++++- fs/btrfs/transaction.c | 8 +++ fs/btrfs/transaction.h | 5 ++ 9 files changed, 239 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 32b24460ec82..2559f5d5d00a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -19,6 +19,7 @@ #ifndef __BTRFS__ #define __BTRFS__ +#include #include #include #include @@ -334,7 +335,11 @@ struct btrfs_fs_info { struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct work_struct trans_work; +#else struct delayed_work trans_work; +#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; @@ -916,6 +921,14 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) +static inline struct dentry *fdentry(struct file *file) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + return file->f_dentry; +#else + return file->f_path.dentry; +#endif +} + /* extent-tree.c */ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, struct btrfs_path *count_path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7213012c27d5..ebb2db624fdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); +#else INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); +#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 8326a18413ff..0d1e59a86e49 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1585,8 +1585,18 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, static int submit_one_bio(int rw, struct bio *bio) { + u64 maxsector; int ret = 0; + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -1678,8 +1688,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, while (cur <= end) { if (cur >= last_byte) { + char *userpage; iosize = PAGE_CACHE_SIZE - page_offset; - zero_user_page(page, page_offset, iosize, KM_USER0); + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1707,7 +1721,12 @@ static int __extent_read_full_page(struct extent_map_tree *tree, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - zero_user_page(page, page_offset, iosize, KM_USER0); + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, GFP_NOFS); unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); @@ -1804,9 +1823,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, } if (page->index == end_index) { + char *userpage; + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - zero_user_page(page, offset, - PAGE_CACHE_SIZE - offset, KM_USER0); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); } set_page_extent_mapped(page); @@ -1921,6 +1945,129 @@ done: return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + int extent_write_full_page(struct extent_map_tree *tree, struct page *page, get_extent_t *get_extent, struct writeback_control *wbc) @@ -1945,18 +2092,20 @@ int extent_write_full_page(struct extent_map_tree *tree, struct page *page, ret = __extent_writepage(page, wbc, &epd); write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_write_full_page); + int extent_writepages(struct extent_map_tree *tree, struct address_space *mapping, get_extent_t *get_extent, struct writeback_control *wbc) { - int ret; + int ret = 0; struct extent_page_data epd = { .bio = NULL, .tree = tree, @@ -1964,8 +2113,9 @@ int extent_writepages(struct extent_map_tree *tree, }; ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) + if (epd.bio) { submit_one_bio(WRITE, epd.bio); + } return ret; } EXPORT_SYMBOL(extent_writepages); @@ -2106,7 +2256,9 @@ int extent_prepare_write(struct extent_map_tree *tree, flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); } - if (!isnew && !PageUptodate(page) && + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && (block_off_end > to || block_off_start < from) && !test_range_bit(tree, block_start, cur_end, EXTENT_UPTODATE, 1)) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 62fcd79d8ab3..461b09663fed 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -231,7 +231,7 @@ static int dirty_and_release_pages(struct btrfs_trans_handle *trans, { int err = 0; int i; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map *em; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 hint_byte; @@ -652,7 +652,7 @@ static int prepare_pages(struct btrfs_root *root, { int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; int err = 0; u64 start_pos; @@ -666,7 +666,11 @@ static int prepare_pages(struct btrfs_root *root, err = -ENOMEM; BUG_ON(1); } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + ClearPageDirty(pages[i]); +#else cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); +#endif wait_on_page_writeback(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); @@ -682,7 +686,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ssize_t num_written = 0; ssize_t err = 0; int ret = 0; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct page **pages = NULL; int nrptrs; @@ -707,7 +711,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out; if (count == 0) goto out; - err = remove_suid(file->f_path.dentry); + err = remove_suid(fdentry(file)); if (err) goto out; file_update_time(file); @@ -862,6 +866,9 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + .sendfile = generic_file_sendfile, +#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686dd03f34f2..6a7d9160df27 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -532,7 +532,11 @@ err: dir->i_size -= name_len * 2; dir->i_mtime = dir->i_ctime = CURRENT_TIME; btrfs_update_inode(trans, root, dir); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + dentry->d_inode->i_nlink--; +#else drop_nlink(dentry->d_inode); +#endif ret = btrfs_update_inode(trans, root, dentry->d_inode); dir->i_sb->s_dirt = 1; } @@ -1139,7 +1143,7 @@ static unsigned char btrfs_filetype_table[] = { static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = filp->f_dentry->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_item *item; struct btrfs_dir_item *di; @@ -1554,7 +1558,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + inode->i_nlink++; +#else inc_nlink(inode); +#endif mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); @@ -1825,6 +1833,9 @@ insert: if (ret == -EEXIST) { free_extent_map(em); em = NULL; + if (0 && failed_insert == 1) { + btrfs_drop_extent_cache(inode, start, end); + } failed_insert++; if (failed_insert > 5) { printk("failing to insert %Lu %Lu\n", start, end); @@ -1942,7 +1953,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = fdentry(vma->vm_file)->d_inode; unsigned long end; loff_t size; int ret = -EINVAL; @@ -2243,7 +2254,7 @@ static unsigned long force_ra(struct address_space *mapping, } int btrfs_defrag_file(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct page *page; unsigned long last_index; @@ -2329,7 +2340,7 @@ static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) static int btrfs_ioctl_defrag(struct file *file) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; switch (inode->i_mode & S_IFMT) { @@ -2350,7 +2361,7 @@ static int btrfs_ioctl_defrag(struct file *file) long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct btrfs_root *root = BTRFS_I(file->f_path.dentry->d_inode)->root; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; switch (cmd) { case BTRFS_IOC_SNAP_CREATE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b7323914bf5a..7cab680418a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 1671c731800e..cd673ca9bb98 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -189,12 +189,29 @@ static struct kset btrfs_kset; int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) { int error; + char *name; + char c; + int len = strlen(fs->sb->s_id) + 1; + int i; + + name = kmalloc(len, GFP_NOFS); + if (!name) { + error = -ENOMEM; + goto fail; + } + + for (i = 0; i < len; i++) { + c = fs->sb->s_id[i]; + if (c == '/' || c == '\\') + c = '!'; + name[i] = c; + } + name[len] = '\0'; fs->super_kobj.kset = &btrfs_kset; fs->super_kobj.ktype = &btrfs_super_ktype; - error = kobject_set_name(&fs->super_kobj, "%s", - fs->sb->s_id); + error = kobject_set_name(&fs->super_kobj, "%s", name); if (error) goto fail; @@ -202,9 +219,11 @@ int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) if (error) goto fail; + kfree(name); return 0; fail: + kfree(name); printk(KERN_ERR "btrfs: sysfs creation for super failed\n"); return error; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0229e69bd2ff..1ad611b9f61b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -614,12 +614,20 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) } return 0; } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p) +#else void btrfs_transaction_cleaner(struct work_struct *work) +#endif { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info, trans_work.work); +#endif struct btrfs_root *root = fs_info->tree_root; struct btrfs_transaction *cur; struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ae39fcfc169a..eef840bca91e 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -73,7 +73,12 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_transaction_cleaner(void *p); +#else void btrfs_transaction_cleaner(struct work_struct *work); +#endif + void btrfs_transaction_flush_work(struct btrfs_root *root); void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); void btrfs_init_transaction_sys(void); -- cgit v1.2.3 From edbd8d4efe4ddaf29a175ae504e2c9a05a96ebee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:24 -0500 Subject: Btrfs: Support for online FS resize (grow and shrink) Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++ fs/btrfs/disk-io.c | 32 +++- fs/btrfs/disk-io.h | 2 + fs/btrfs/extent-tree.c | 480 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 105 ++++++++++- fs/btrfs/ioctl.h | 2 + fs/btrfs/root-tree.c | 5 + fs/btrfs/super.c | 7 +- 8 files changed, 629 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 10129cc6656f..b83a1bec346f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -508,6 +508,8 @@ BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_FUNCS(disk_block_group_flags, struct btrfs_block_group_item, + flags, 8); /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); @@ -960,6 +962,9 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, int level, u64 hint, u64 empty_size); +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size); +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 bytenr, @@ -1117,6 +1122,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 isize); /* inode.c */ +unsigned long btrfs_force_ra(struct address_space *mapping, + struct file_ra_state *ra, struct file *file, + pgoff_t offset, pgoff_t last_index); int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1162,4 +1170,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root); ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); +/* super.c */ +u64 btrfs_parse_size(char *str); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eebb4fb65c61..de026d9d9b2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,13 +471,17 @@ insert: return root; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location, - const char *name, int namelen) +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; int ret; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); if (root) @@ -494,6 +498,23 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + const char *name, int namelen) +{ + struct btrfs_root *root; + int ret; + + root = btrfs_read_fs_root_no_name(fs_info, location); + if (!root) + return NULL; ret = btrfs_set_root_name(root, name, namelen); if (ret) { @@ -509,11 +530,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - return root; } #if 0 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 33d2ccfd74d8..8c3cfd02901f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -39,6 +39,8 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, const char *name, int namelen); struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_key *location); +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location); int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, u64 device_id, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1412d556313f..de0fb0743cf9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -18,6 +18,7 @@ #include #include +#include #include "hash.h" #include "ctree.h" #include "disk-io.h" @@ -1622,6 +1623,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int pending_ret; u64 super_used, root_used; u64 search_start = 0; + u64 new_hint; struct btrfs_fs_info *info = root->fs_info; struct btrfs_root *extent_root = info->extent_root; struct btrfs_extent_item extent_item; @@ -1629,6 +1631,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_extent_refs(&extent_item, 1); + new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + if (new_hint < btrfs_super_total_bytes(&info->super_copy)) + hint_byte = new_hint; + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2100,6 +2106,480 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } +static int relocate_inode_pages(struct inode *inode, u64 start, u64 len) +{ + u64 page_start; + u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; + unsigned long last_index; + unsigned long first_index; + unsigned long i; + struct page *page; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct file_ra_state ra; + + mutex_lock(&inode->i_mutex); + first_index = start >> PAGE_CACHE_SHIFT; + last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; + + memset(&ra, 0, sizeof(ra)); + file_ra_state_init(&ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, &ra, NULL, first_index, last_index); + + for (i = first_index; i <= last_index; i++) { + page = grab_cache_page(inode->i_mapping, i); + if (!page) + goto out_unlock; + if (!PageUptodate(page)) { + btrfs_readpage(NULL, page); + lock_page(page); + if (!PageUptodate(page)) { + unlock_page(page); + page_cache_release(page); + goto out_unlock; + } + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_end = page_start + PAGE_CACHE_SIZE - 1; + + lock_extent(em_tree, page_start, page_end, GFP_NOFS); + + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); + + set_extent_delalloc(em_tree, page_start, + page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + } + +out_unlock: + mutex_unlock(&inode->i_mutex); + return 0; +} + +static int relocate_one_reference(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + u64 ref_root, u64 ref_gen, u64 ref_objectid, + u64 ref_offset) +{ + struct inode *inode; + struct btrfs_root *found_root; + struct btrfs_key root_location; + int ret; + + root_location.objectid = ref_root; + if (ref_gen == 0) + root_location.offset = 0; + else + root_location.offset = (u64)-1; + root_location.type = BTRFS_ROOT_ITEM_KEY; + + found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, + &root_location); + BUG_ON(!found_root); + + if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { + mutex_unlock(&extent_root->fs_info->fs_mutex); + inode = btrfs_iget_locked(extent_root->fs_info->sb, + ref_objectid, found_root); + if (inode->i_state & I_NEW) { + /* the inode and parent dir are two different roots */ + BTRFS_I(inode)->root = found_root; + BTRFS_I(inode)->location.objectid = ref_objectid; + BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; + BTRFS_I(inode)->location.offset = 0; + btrfs_read_locked_inode(inode); + unlock_new_inode(inode); + + } + /* this can happen if the reference is not against + * the latest version of the tree root + */ + if (is_bad_inode(inode)) { + mutex_lock(&extent_root->fs_info->fs_mutex); + goto out; + } + relocate_inode_pages(inode, ref_offset, extent_key->offset); + /* FIXME, data=ordered will help get rid of this */ + filemap_fdatawrite(inode->i_mapping); + iput(inode); + mutex_lock(&extent_root->fs_info->fs_mutex); + } else { + struct btrfs_trans_handle *trans; + struct btrfs_key found_key; + struct extent_buffer *eb; + int level; + int i; + + trans = btrfs_start_transaction(found_root, 1); + eb = read_tree_block(found_root, extent_key->objectid, + extent_key->offset); + level = btrfs_header_level(eb); + + if (level == 0) + btrfs_item_key_to_cpu(eb, &found_key, 0); + else + btrfs_node_key_to_cpu(eb, &found_key, 0); + + free_extent_buffer(eb); + + path->lowest_level = level; + path->reada = 0; + ret = btrfs_search_slot(trans, found_root, &found_key, path, + 0, 1); + path->lowest_level = 0; + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + break; + free_extent_buffer(path->nodes[i]); + path->nodes[i] = NULL; + } + btrfs_release_path(found_root, path); + btrfs_end_transaction(trans, found_root); + } + +out: + return 0; +} + +static int relocate_one_extent(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_extent_ref *ref; + struct extent_buffer *leaf; + u64 ref_root; + u64 ref_gen; + u64 ref_objectid; + u64 ref_offset; + u32 nritems; + u32 item_size; + int ret = 0; + + key.objectid = extent_key->objectid; + key.type = BTRFS_EXTENT_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); + + BUG_ON(ret == 0); + + if (ret < 0) + goto out; + + ret = 0; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] == nritems) + goto out; + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != extent_key->objectid) + break; + + if (found_key.type != BTRFS_EXTENT_REF_KEY) + break; + + key.offset = found_key.offset + 1; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_root = btrfs_ref_root(leaf, ref); + ref_gen = btrfs_ref_generation(leaf, ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + ref_offset = btrfs_ref_offset(leaf, ref); + btrfs_release_path(extent_root, path); + + ret = relocate_one_reference(extent_root, path, + extent_key, ref_root, ref_gen, + ref_objectid, ref_offset); + if (ret) + goto out; + } + ret = 0; +out: + btrfs_release_path(extent_root, path); + return ret; +} + +static int find_overlapping_extent(struct btrfs_root *root, + struct btrfs_path *path, u64 new_size) +{ + struct btrfs_key found_key; + struct extent_buffer *leaf; + int ret; + + while(1) { + if (path->slots[0] == 0) { + ret = btrfs_prev_leaf(root, path); + if (ret == 1) { + return 1; + } + if (ret < 0) + return ret; + } else { + path->slots[0]--; + } + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.type == BTRFS_EXTENT_ITEM_KEY) { + if (found_key.objectid + found_key.offset > new_size) + return 0; + else + return 1; + } + } + return 1; +} + +int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; + struct btrfs_path *path; + u64 cur_byte; + u64 total_found; + u64 ptr; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct btrfs_key found_key = { 0, 0, 0 }; + struct extent_buffer *leaf; + u32 nritems; + int ret; + int slot; + + btrfs_set_super_total_bytes(&info->super_copy, new_size); + block_group_cache = &info->block_group_cache; + path = btrfs_alloc_path(); + root = root->fs_info->extent_root; + +again: + total_found = 0; + key.objectid = new_size; + cur_byte = key.objectid; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; +next: + leaf = path->nodes[0]; + if (key.objectid == new_size - 1) { + ret = find_overlapping_extent(root, path, new_size); + if (ret != 0) { + btrfs_release_path(root, path); + ret = btrfs_search_slot(NULL, root, &key, + path, 0, 0); + if (ret < 0) + goto out; + } + } + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY) { + path->slots[0]++; + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret == 1) { + ret = 0; + break; + } + } + goto next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (found_key.objectid + found_key.offset <= cur_byte) + continue; + total_found++; + cur_byte = found_key.objectid + found_key.offset; + key.objectid = cur_byte; + btrfs_release_path(root, path); + ret = relocate_one_extent(root, path, &found_key); + } + + btrfs_release_path(root, path); + + if (total_found > 0) { + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + + mutex_unlock(&root->fs_info->fs_mutex); + btrfs_clean_old_snapshots(tree_root); + mutex_lock(&root->fs_info->fs_mutex); + + trans = btrfs_start_transaction(tree_root, 1); + btrfs_commit_transaction(trans, tree_root); + goto again; + } + + trans = btrfs_start_transaction(root, 1); + key.objectid = new_size; + key.offset = 0; + key.type = 0; + while(1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; +bg_next: + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + ret = 0; + slot = path->slots[0]; + if (slot < nritems) + btrfs_item_key_to_cpu(leaf, &found_key, slot); + if (slot == nritems || + btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (slot < nritems) { + printk("shrinker found key %Lu %u %Lu\n", + found_key.objectid, found_key.type, + found_key.offset); + path->slots[0]++; + } + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + break; + if (ret == 1) { + ret = 0; + break; + } + } + goto bg_next; + } + btrfs_item_key_to_cpu(leaf, &found_key, slot); + ret = get_state_private(&info->block_group_cache, + found_key.objectid, &ptr); + if (!ret) + kfree((void *)(unsigned long)ptr); + + clear_extent_bits(&info->block_group_cache, found_key.objectid, + found_key.objectid + found_key.offset - 1, + (unsigned int)-1, GFP_NOFS); + + key.objectid = found_key.objectid + 1; + btrfs_del_item(trans, root, path); + btrfs_release_path(root, path); + } + clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1, + GFP_NOFS); + btrfs_commit_transaction(trans, root); +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 new_size) +{ + struct btrfs_path *path; + u64 nr = 0; + u64 cur_byte; + u64 old_size; + struct btrfs_block_group_cache *cache; + struct btrfs_block_group_item *item; + struct btrfs_fs_info *info = root->fs_info; + struct extent_map_tree *block_group_cache; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + int bit; + + old_size = btrfs_super_total_bytes(&info->super_copy); + block_group_cache = &info->block_group_cache; + + root = info->extent_root; + + cache = btrfs_lookup_block_group(root->fs_info, old_size - 1); + + cur_byte = cache->key.objectid + cache->key.offset; + if (cur_byte >= new_size) + goto set_size; + + key.offset = BTRFS_BLOCK_GROUP_SIZE; + btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + while(cur_byte < new_size) { + key.objectid = cur_byte; + ret = btrfs_insert_empty_item(trans, root, path, &key, + sizeof(struct btrfs_block_group_item)); + BUG_ON(ret); + leaf = path->nodes[0]; + item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_block_group_item); + + btrfs_set_disk_block_group_used(leaf, item, 0); + if (nr % 3) { + btrfs_set_disk_block_group_flags(leaf, item, + BTRFS_BLOCK_GROUP_DATA); + } else { + btrfs_set_disk_block_group_flags(leaf, item, 0); + } + nr++; + + cache = kmalloc(sizeof(*cache), GFP_NOFS); + BUG_ON(!cache); + + read_extent_buffer(leaf, &cache->item, (unsigned long)item, + sizeof(cache->item)); + + memcpy(&cache->key, &key, sizeof(key)); + cache->cached = 0; + cache->pinned = 0; + cur_byte = key.objectid + key.offset; + btrfs_release_path(root, path); + + if (cache->item.flags & BTRFS_BLOCK_GROUP_DATA) { + bit = BLOCK_GROUP_DATA; + cache->data = BTRFS_BLOCK_GROUP_DATA; + } else { + bit = BLOCK_GROUP_METADATA; + cache->data = 0; + } + + /* use EXTENT_LOCKED to prevent merging */ + set_extent_bits(block_group_cache, key.objectid, + key.objectid + key.offset - 1, + bit | EXTENT_LOCKED, GFP_NOFS); + set_state_private(block_group_cache, key.objectid, + (unsigned long)cache); + } + btrfs_free_path(path); +set_size: + btrfs_set_super_total_bytes(&info->super_copy, new_size); + return 0; +} + int btrfs_read_block_groups(struct btrfs_root *root) { struct btrfs_path *path; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5f5b7b89b144..f6a20112d9e7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2346,7 +2346,7 @@ fail_unlock: return ret; } -static unsigned long force_ra(struct address_space *mapping, +unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) { @@ -2372,6 +2372,8 @@ int btrfs_defrag_file(struct file *file) { unsigned long ra_index = 0; u64 page_start; u64 page_end; + u64 delalloc_start; + u64 existing_delalloc; unsigned long i; int ret; @@ -2385,8 +2387,9 @@ int btrfs_defrag_file(struct file *file) { last_index = inode->i_size >> PAGE_CACHE_SHIFT; for (i = 0; i <= last_index; i++) { if (i == ra_index) { - ra_index = force_ra(inode->i_mapping, &file->f_ra, - file, ra_index, last_index); + ra_index = btrfs_force_ra(inode->i_mapping, + &file->f_ra, + file, ra_index, last_index); } page = grab_cache_page(inode->i_mapping, i); if (!page) @@ -2404,8 +2407,19 @@ int btrfs_defrag_file(struct file *file) { page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(em_tree, page_start, page_end, GFP_NOFS); + delalloc_start = page_start; + existing_delalloc = + count_range_bits(&BTRFS_I(inode)->extent_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); set_extent_delalloc(em_tree, page_start, page_end, GFP_NOFS); + + spin_lock(&root->fs_info->delalloc_lock); + root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - + existing_delalloc; + spin_unlock(&root->fs_info->delalloc_lock); + unlock_extent(em_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); @@ -2418,6 +2432,89 @@ out_unlock: return 0; } +static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) +{ + u64 new_size; + u64 old_size; + struct btrfs_ioctl_vol_args *vol_args; + struct btrfs_trans_handle *trans; + char *sizestr; + int ret = 0; + int namelen; + int mod = 0; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + namelen = strlen(vol_args->name); + if (namelen > BTRFS_VOL_NAME_MAX) { + ret = -EINVAL; + goto out; + } + + sizestr = vol_args->name; + if (!strcmp(sizestr, "max")) + new_size = root->fs_info->sb->s_bdev->bd_inode->i_size; + else { + if (sizestr[0] == '-') { + mod = -1; + sizestr++; + } else if (sizestr[0] == '+') { + mod = 1; + sizestr++; + } + new_size = btrfs_parse_size(sizestr); + if (new_size == 0) { + ret = -EINVAL; + goto out; + } + } + + mutex_lock(&root->fs_info->fs_mutex); + old_size = btrfs_super_total_bytes(&root->fs_info->super_copy); + + if (mod < 0) { + if (new_size > old_size) { + ret = -EINVAL; + goto out_unlock; + } + new_size = old_size - new_size; + } else if (mod > 0) { + new_size = old_size + new_size; + } + + if (new_size < 256 * 1024 * 1024) { + ret = -EINVAL; + goto out_unlock; + } + if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) { + ret = -EFBIG; + goto out_unlock; + } + new_size = (new_size / root->sectorsize) * root->sectorsize; + +printk("new size is %Lu\n", new_size); + if (new_size > old_size) { + trans = btrfs_start_transaction(root, 1); + ret = btrfs_grow_extent_tree(trans, root, new_size); + btrfs_commit_transaction(trans, root); + } else { + ret = btrfs_shrink_extent_tree(root, new_size); + } + +out_unlock: + mutex_unlock(&root->fs_info->fs_mutex); +out: + kfree(vol_args); + return ret; +} + static int btrfs_ioctl_snap_create(struct btrfs_root *root, void __user *arg) { struct btrfs_ioctl_vol_args *vol_args; @@ -2510,6 +2607,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(root, (void __user *)arg); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); + case BTRFS_IOC_RESIZE: + return btrfs_ioctl_resize(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8bc47dec2865..8c6290665d49 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -30,4 +30,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 88bcdd33f56e..e2b9b86af696 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -179,6 +179,11 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_search_slot(trans, root, key, path, -1, 1); if (ret < 0) goto out; + if (ret) { +btrfs_print_leaf(root, path->nodes[0]); +printk("failed to del %Lu %u %Lu\n", key->objectid, key->type, key->offset); + + } BUG_ON(ret != 0); leaf = path->nodes[0]; ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7cab680418a1..3bbd7f665694 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -74,9 +74,9 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static unsigned long parse_size(char *str) +u64 btrfs_parse_size(char *str) { - unsigned long res; + u64 res; int mult = 1; char *end; char last; @@ -150,7 +150,8 @@ static int parse_options (char * options, if (info) { char *num = match_strdup(&args[0]); if (num) { - info->max_extent = parse_size(num); + info->max_extent = + btrfs_parse_size(num); kfree(num); info->max_extent = max_t(u64, -- cgit v1.2.3 From 8f662a76c6af8eb367fa519e9bb9766040d9cea8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 Jan 2008 10:01:11 -0500 Subject: Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 9 ++++++--- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 9 +++++---- fs/btrfs/inode.c | 2 +- fs/btrfs/super.c | 16 +++++++++++++++- 5 files changed, 28 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2b3ffa707eae..8fa92a2d9819 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2654,9 +2654,9 @@ int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { + u64 bytenr; int slot; int level = 1; - u64 bytenr; struct extent_buffer *c; struct extent_buffer *next = NULL; @@ -2687,11 +2687,14 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) level--; c = path->nodes[level]; free_extent_buffer(c); + slot = btrfs_header_nritems(next); + if (slot != 0) + slot--; path->nodes[level] = next; - path->slots[level] = 0; + path->slots[level] = slot; if (!level) break; - next = read_tree_block(root, btrfs_node_blockptr(next, 0), + next = read_tree_block(root, btrfs_node_blockptr(next, slot), btrfs_level_size(root, level - 1)); } return 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b83a1bec346f..d6e5c19969b0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -324,6 +324,7 @@ struct btrfs_fs_info { u64 last_trans_committed; unsigned long mount_opt; u64 max_extent; + u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; struct extent_buffer *sb_buffer; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a421069e998b..d6243c2afc73 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1630,11 +1630,11 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; btrfs_set_stack_extent_refs(&extent_item, 1); -#if 0 - new_hint = max(hint_byte, 16ULL * 1024 * 1024 * 1024); + + new_hint = max(hint_byte, root->fs_info->alloc_start); if (new_hint < btrfs_super_total_bytes(&info->super_copy)) hint_byte = new_hint; -#endif + WARN_ON(num_bytes < root->sectorsize); ret = find_free_extent(trans, root, num_bytes, empty_size, search_start, search_end, hint_byte, ins, @@ -2239,7 +2239,7 @@ static int relocate_one_reference(struct btrfs_root *extent_root, free_extent_buffer(eb); path->lowest_level = level; - path->reada = 0; + path->reada = 2; ret = btrfs_search_slot(trans, found_root, &found_key, path, 0, 1); path->lowest_level = 0; @@ -2372,6 +2372,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) block_group_cache = &info->block_group_cache; path = btrfs_alloc_path(); root = root->fs_info->extent_root; + path->reada = 2; again: total_found = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3c7bf3bddcad..d6de46961887 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2058,7 +2058,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_lock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3bbd7f665694..f1b56eb77b1d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -63,7 +63,8 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_err, + Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, + Opt_alloc_start, Opt_err, }; static match_table_t tokens = { @@ -71,6 +72,7 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_max_extent, "max_extent=%s"}, + {Opt_alloc_start, "alloc_start=%s"}, {Opt_err, NULL} }; @@ -162,6 +164,18 @@ static int parse_options (char * options, } } break; + case Opt_alloc_start: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->alloc_start = + btrfs_parse_size(num); + kfree(num); + printk("btrfs: allocations start at " + "%Lu\n", info->alloc_start); + } + } + break; default: break; } -- cgit v1.2.3 From 21ad10cf3e9c1ef42e725e5c3a593c49f779a16b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 09:23:21 -0500 Subject: Btrfs: Add flush barriers on commit Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++-- fs/btrfs/disk-io.c | 5 +++++ fs/btrfs/super.c | 9 ++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7eda51542d17..0c0edacbc14c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -440,8 +440,9 @@ struct btrfs_root { */ #define BTRFS_STRING_ITEM_KEY 253 -#define BTRFS_MOUNT_NODATASUM 0x1 -#define BTRFS_MOUNT_NODATACOW 0x2 +#define BTRFS_MOUNT_NODATASUM (1 << 0) +#define BTRFS_MOUNT_NODATACOW (1 << 1) +#define BTRFS_MOUNT_NOBARRIER (1 << 2) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0940a39ff07..a481b970608c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -762,10 +762,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct extent_buffer *super = root->fs_info->sb_buffer; struct inode *btree_inode = root->fs_info->btree_inode; + struct super_block *sb = root->fs_info->sb; + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f1b56eb77b1d..9ab4086d463c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,13 +64,14 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_err, + Opt_alloc_start, Opt_nobarrier, Opt_err, }; static match_table_t tokens = { {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, + {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_err, NULL} @@ -148,6 +149,12 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_nobarrier: + if (info) { + printk("btrfs: turning off barriers\n"); + btrfs_set_opt(info->mount_opt, NOBARRIER); + } + break; case Opt_max_extent: if (info) { char *num = match_strdup(&args[0]); -- cgit v1.2.3 From 61295eb8665e723e77af91d0a1e655a4bd28344f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 16:24:38 -0500 Subject: Btrfs: Add drop inode func to avoid data=ordered deadlock Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/inode.c | 19 +++++++++++++++++++ fs/btrfs/super.c | 1 + 3 files changed, 21 insertions(+) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f65b258344d4..a2c2d6d82c5e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1144,6 +1144,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); +void btrfs_drop_inode(struct inode *inode); void btrfs_read_locked_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5260b4708158..e53d20331646 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -990,6 +990,25 @@ out: fail: return err; } + +void btrfs_drop_inode(struct inode *inode) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (!BTRFS_I(inode)->ordered_trans) { + generic_drop_inode(inode); + return; + } + /* nasty, but it prevents a deadlock with data=ordered by preventing + * a commit until after this inode is done + */ + trans = btrfs_start_transaction(root, 1); + generic_drop_inode(inode); + /* note, the inode is now untouchable */ + btrfs_end_transaction(trans, root); +} + void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ab4086d463c..4deea393ca94 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -425,6 +425,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, + .drop_inode = btrfs_drop_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, -- cgit v1.2.3 From 2da98f003f4788b0a72c5f87bc55b061f65f30fa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Jan 2008 11:44:43 -0500 Subject: Btrfs: Run igrab on data=ordered inodes to prevent deadlocks during writeout Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 32 ++++++++++++++++++++++++++------ fs/btrfs/ordered-data.c | 10 ++++++++-- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 1 + 5 files changed, 37 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1e19f2d86334..fa65fe027e45 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1145,7 +1145,7 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); -void btrfs_drop_inode(struct inode *inode); +void btrfs_put_inode(struct inode *inode); void btrfs_read_locked_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, int wait); void btrfs_dirty_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f83f88ca8ac7..c1ac0bcbb462 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -601,6 +601,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) { struct btrfs_root *root; struct btrfs_trans_handle *trans; + struct inode *inode = dentry->d_inode; int ret; unsigned long nr = 0; @@ -617,6 +618,18 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_trans(trans, root, dir, dentry); nr = trans->blocks_used; + if (inode->i_nlink == 0) { + int found; + /* if the inode isn't linked anywhere, + * we don't need to worry about + * data=ordered + */ + found = btrfs_del_ordered_inode(inode); + if (found == 1) { + atomic_dec(&inode->i_count); + } + } + btrfs_end_transaction(trans, root); fail: mutex_unlock(&root->fs_info->fs_mutex); @@ -993,15 +1006,22 @@ fail: return err; } -void btrfs_drop_inode(struct inode *inode) +void btrfs_put_inode(struct inode *inode) { - if (!BTRFS_I(inode)->ordered_trans || inode->i_nlink) { - generic_drop_inode(inode); + int ret; + + if (!BTRFS_I(inode)->ordered_trans) { + return; + } + + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) return; + + ret = btrfs_del_ordered_inode(inode); + if (ret == 1) { + atomic_dec(&inode->i_count); } - /* FIXME, make sure this delete actually ends up in the transaction */ - btrfs_del_ordered_inode(inode); - generic_drop_inode(inode); } void btrfs_delete_inode(struct inode *inode) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b56011baa17c..cba2b623d02e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -153,6 +153,8 @@ int btrfs_add_ordered_inode(struct inode *inode) write_unlock(&tree->lock); if (node) kfree(entry); + else + igrab(inode); return 0; } @@ -221,6 +223,7 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, } static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, + struct inode *inode, u64 root_objectid, u64 objectid) { struct tree_entry *entry; @@ -234,6 +237,7 @@ static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, return 0; } rb_erase(node, &tree->tree); + BTRFS_I(inode)->ordered_trans = 0; write_unlock(&tree->lock); entry = rb_entry(node, struct tree_entry, rb_node); kfree(entry); @@ -244,14 +248,16 @@ int btrfs_del_ordered_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 root_objectid = root->root_key.objectid; + int ret = 0; spin_lock(&root->fs_info->new_trans_lock); if (root->fs_info->running_transaction) { struct btrfs_ordered_inode_tree *tree; tree = &root->fs_info->running_transaction->ordered_inode_tree; - __btrfs_del_ordered_inode(tree, root_objectid, inode->i_ino); + ret = __btrfs_del_ordered_inode(tree, inode, root_objectid, + inode->i_ino); } spin_unlock(&root->fs_info->new_trans_lock); - return 0; + return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4deea393ca94..e506de3168bc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -425,7 +425,7 @@ static struct file_system_type btrfs_fs_type = { static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, - .drop_inode = btrfs_drop_inode, + .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a3205808ab2b..08f7a188dc3e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -521,6 +521,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, if (inode) { if (S_ISREG(inode->i_mode)) filemap_write_and_wait(inode->i_mapping); + atomic_dec(&inode->i_count); iput(inode); } mutex_lock(&root->fs_info->fs_mutex); -- cgit v1.2.3 From e18e4809b10e6c9efb5fe10c1ddcb4ebb690d517 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jan 2008 10:54:22 -0500 Subject: Btrfs: Add mount -o ssd, which includes optimizations for seek free storage Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 3 +++ fs/btrfs/extent-tree.c | 31 ++++++++++++++++++++++++++++++- fs/btrfs/super.c | 9 ++++++++- fs/btrfs/transaction.c | 1 + fs/btrfs/tree-defrag.c | 3 +++ 6 files changed, 47 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fa65fe027e45..7a588ba2b747 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -351,6 +351,7 @@ struct btrfs_fs_info { spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; + u64 last_alloc; }; /* * in ram representation of the tree. extent_root is used for all allocations @@ -444,6 +445,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) +#define BTRFS_MOUNT_SSD (1 << 3) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 731a534f81f5..5d1f9bca2712 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -193,6 +193,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -676,6 +677,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; + fs_info->last_alloc = 0; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2c569b4d59d4..b69a46691a96 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1431,6 +1431,19 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_MIXED; } + /* for SSD, cluster allocations together as much as possible */ + if (btrfs_test_opt(root, SSD)) { + if (!data) { + if (root->fs_info->last_alloc) + hint_byte = root->fs_info->last_alloc; + else { + hint_byte = hint_byte & + ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1); + empty_size += 16 * 1024 * 1024; + } + } + } + search_end = min(search_end, btrfs_super_total_bytes(&info->super_copy)); if (hint_byte) { @@ -1456,6 +1469,19 @@ check_failed: } search_start = find_search_start(root, &block_group, search_start, total_needed, data); + + if (!data && btrfs_test_opt(root, SSD) && info->last_alloc && + search_start != info->last_alloc) { + info->last_alloc = 0; + if (!empty_size) { + empty_size += 16 * 1024 * 1024; + total_needed += empty_size; + } + search_start = find_search_start(root, &block_group, + search_start, total_needed, + data); + } + search_start = stripe_align(root, search_start); cached_start = search_start; btrfs_init_path(path); @@ -1610,6 +1636,8 @@ enospc: error: btrfs_release_path(root, path); btrfs_free_path(path); + if (btrfs_test_opt(root, SSD) && !ret && !data) + info->last_alloc = ins->objectid + ins->offset; return ret; } /* @@ -1778,7 +1806,8 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; - btrfs_set_buffer_defrag(buf); + if (!btrfs_test_opt(root, SSD)) + btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e506de3168bc..b6982f0133cf 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,7 +64,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_nobarrier, Opt_err, + Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; static match_table_t tokens = { @@ -74,6 +74,7 @@ static match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, {Opt_alloc_start, "alloc_start=%s"}, + {Opt_ssd, "ssd"}, {Opt_err, NULL} }; @@ -149,6 +150,12 @@ static int parse_options (char * options, btrfs_set_opt(info->mount_opt, NODATASUM); } break; + case Opt_ssd: + if (info) { + printk("btrfs: use ssd allocation scheme\n"); + btrfs_set_opt(info->mount_opt, SSD); + } + break; case Opt_nobarrier: if (info) { printk("btrfs: turning off barriers\n"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index b6bbfc179c29..163c01a24498 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -57,6 +57,7 @@ static int join_transaction(struct btrfs_root *root) BUG_ON(!cur_trans); root->fs_info->generation++; root->fs_info->running_transaction = cur_trans; + root->fs_info->last_alloc = 0; cur_trans->num_writers = 1; cur_trans->num_joined = 0; cur_trans->transid = root->fs_info->generation; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 5c58630dce03..5935cbd8f2b8 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -179,6 +179,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, if (root->ref_cows == 0 && !is_extent) goto out; + if (btrfs_test_opt(root, SSD)) + goto out; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; -- cgit v1.2.3 From ed0dab6b86cee64e9072107f65fa40629f162b38 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 22 Jan 2008 12:46:56 -0500 Subject: Btrfs: Add basic lockfs calls Signed-off-by: Chris Mason --- fs/btrfs/super.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b6982f0133cf..8b52c69fda2e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -429,6 +429,17 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +static void btrfs_write_super_lockfs(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + btrfs_transaction_flush_work(root); +} + +static void btrfs_unlockfs(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb); + btrfs_transaction_queue_work(root, HZ * 30); +} static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, @@ -442,8 +453,9 @@ static struct super_operations btrfs_super_ops = { .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .statfs = btrfs_statfs, + .write_super_lockfs = btrfs_write_super_lockfs, + .unlockfs = btrfs_unlockfs, }; - static int __init init_btrfs_fs(void) { int err; -- cgit v1.2.3 From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 3 +- fs/btrfs/btrfs_inode.h | 2 + fs/btrfs/ctree.h | 15 +- fs/btrfs/disk-io.c | 123 +- fs/btrfs/extent-tree.c | 43 +- fs/btrfs/extent_io.c | 3089 +++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent_io.h | 193 +++ fs/btrfs/extent_map.c | 3119 ++---------------------------------------------- fs/btrfs/extent_map.h | 214 +--- fs/btrfs/file.c | 29 +- fs/btrfs/inode.c | 175 +-- fs/btrfs/super.c | 10 +- fs/btrfs/transaction.c | 8 +- fs/btrfs/transaction.h | 2 +- 14 files changed, 3612 insertions(+), 3413 deletions(-) create mode 100644 fs/btrfs/extent_io.c create mode 100644 fs/btrfs/extent_io.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 72cc3e16741f..9c61609f3714 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -5,7 +5,8 @@ obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ - extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o + extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ + extent_io.o ifeq ($(CONFIG_FS_POSIX_ACL),y) btrfs-y += acl.o diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 9fc99883c70a..9a0647dc5a0b 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -20,6 +20,7 @@ #define __BTRFS_I__ #include "extent_map.h" +#include "extent_io.h" /* in memory btrfs inode */ struct btrfs_inode { @@ -27,6 +28,7 @@ struct btrfs_inode { struct btrfs_block_group_cache *block_group; struct btrfs_key location; struct extent_map_tree extent_tree; + struct extent_io_tree io_tree; struct inode vfs_inode; u64 ordered_trans; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b9f2975b55c9..6c65473e0fe3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,6 +27,7 @@ #include #include #include "bit-radix.h" +#include "extent_io.h" #include "extent_map.h" struct btrfs_trans_handle; @@ -314,11 +315,11 @@ struct btrfs_fs_info { struct btrfs_root *tree_root; struct radix_tree_root fs_roots_radix; - struct extent_map_tree free_space_cache; - struct extent_map_tree block_group_cache; - struct extent_map_tree pinned_extents; - struct extent_map_tree pending_del; - struct extent_map_tree extent_ins; + struct extent_io_tree free_space_cache; + struct extent_io_tree block_group_cache; + struct extent_io_tree pinned_extents; + struct extent_io_tree pending_del; + struct extent_io_tree extent_ins; u64 generation; u64 last_trans_committed; @@ -956,7 +957,7 @@ u32 btrfs_count_snapshots_in_path(struct btrfs_root *root, u64 first_extent); int btrfs_extent_post_op(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy); +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr); @@ -1001,7 +1002,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root u64 owner_objectid, u64 owner_offset, int pin); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin); + struct extent_io_tree *unpin); int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca2712..4c4ebea0b2a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); + + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_map_tree_init(&fs_info->free_space_cache, + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b69a46691a96..1cf125ab7822 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -63,7 +63,7 @@ static int cache_block_group(struct btrfs_root *root, int ret; struct btrfs_key key; struct extent_buffer *leaf; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; int slot; u64 last = 0; u64 hole_size; @@ -158,7 +158,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct btrfs_fs_info *info, u64 bytenr) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *block_group = NULL; u64 ptr; u64 start; @@ -281,7 +281,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, int data, int owner) { struct btrfs_block_group_cache *cache; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *found_group = NULL; struct btrfs_fs_info *info = root->fs_info; u64 used; @@ -951,7 +951,7 @@ fail: int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_block_group_cache *cache; int ret; int err = 0; @@ -1107,12 +1107,12 @@ static int update_pinned_extents(struct btrfs_root *root, return 0; } -int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) +int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) { u64 last = 0; u64 start; u64 end; - struct extent_map_tree *pinned_extents = &root->fs_info->pinned_extents; + struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; int ret; while(1) { @@ -1128,12 +1128,12 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_map_tree *copy) int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_map_tree *unpin) + struct extent_io_tree *unpin) { u64 start; u64 end; int ret; - struct extent_map_tree *free_space_cache; + struct extent_io_tree *free_space_cache; free_space_cache = &root->fs_info->free_space_cache; while(1) { @@ -1329,8 +1329,8 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct int err = 0; u64 start; u64 end; - struct extent_map_tree *pending_del; - struct extent_map_tree *pinned_extents; + struct extent_io_tree *pending_del; + struct extent_io_tree *pinned_extents; pending_del = &extent_root->fs_info->pending_del; pinned_extents = &extent_root->fs_info->pinned_extents; @@ -1802,7 +1802,7 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->extent_tree, + set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; @@ -2166,7 +2166,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, unsigned long i; struct page *page; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2195,15 +2195,14 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, - &delalloc_start, page_end, - PAGE_CACHE_SIZE, EXTENT_DELALLOC); + existing_delalloc = count_range_bits(io_tree, + &delalloc_start, page_end, + PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2211,7 +2210,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2379,7 +2378,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size) u64 cur_byte; u64 total_found; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; @@ -2561,7 +2560,7 @@ int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *cache; struct btrfs_block_group_item *item; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct extent_buffer *leaf; int ret; @@ -2645,7 +2644,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) int bit; struct btrfs_block_group_cache *cache; struct btrfs_fs_info *info = root->fs_info; - struct extent_map_tree *block_group_cache; + struct extent_io_tree *block_group_cache; struct btrfs_key key; struct btrfs_key found_key; struct extent_buffer *leaf; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c new file mode 100644 index 000000000000..15cc158a0498 --- /dev/null +++ b/fs/btrfs/extent_io.c @@ -0,0 +1,3089 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "extent_io.h" +#include "extent_map.h" + +/* temporary define until extent_map moves out of btrfs */ +struct kmem_cache *btrfs_cache_create(const char *name, size_t size, + unsigned long extra_flags, + void (*ctor)(void *, struct kmem_cache *, + unsigned long)); + +static struct kmem_cache *extent_state_cache; +static struct kmem_cache *extent_buffer_cache; + +static LIST_HEAD(buffers); +static LIST_HEAD(states); + +static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; +#define BUFFER_LRU_MAX 64 + +struct tree_entry { + u64 start; + u64 end; + int in_tree; + struct rb_node rb_node; +}; + +struct extent_page_data { + struct bio *bio; + struct extent_io_tree *tree; + get_extent_t *get_extent; +}; + +int __init extent_io_init(void) +{ + extent_state_cache = btrfs_cache_create("extent_state", + sizeof(struct extent_state), 0, + NULL); + if (!extent_state_cache) + return -ENOMEM; + + extent_buffer_cache = btrfs_cache_create("extent_buffers", + sizeof(struct extent_buffer), 0, + NULL); + if (!extent_buffer_cache) + goto free_state_cache; + return 0; + +free_state_cache: + kmem_cache_destroy(extent_state_cache); + return -ENOMEM; +} + +void extent_io_exit(void) +{ + struct extent_state *state; + + while (!list_empty(&states)) { + state = list_entry(states.next, struct extent_state, list); + printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); + list_del(&state->list); + kmem_cache_free(extent_state_cache, state); + + } + + if (extent_state_cache) + kmem_cache_destroy(extent_state_cache); + if (extent_buffer_cache) + kmem_cache_destroy(extent_buffer_cache); +} + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask) +{ + tree->state.rb_node = NULL; + tree->ops = NULL; + tree->dirty_bytes = 0; + rwlock_init(&tree->lock); + spin_lock_init(&tree->lru_lock); + tree->mapping = mapping; + INIT_LIST_HEAD(&tree->buffer_lru); + tree->lru_size = 0; +} +EXPORT_SYMBOL(extent_io_tree_init); + +void extent_io_tree_empty_lru(struct extent_io_tree *tree) +{ + struct extent_buffer *eb; + while(!list_empty(&tree->buffer_lru)) { + eb = list_entry(tree->buffer_lru.next, struct extent_buffer, + lru); + list_del_init(&eb->lru); + free_extent_buffer(eb); + } +} +EXPORT_SYMBOL(extent_io_tree_empty_lru); + +struct extent_state *alloc_extent_state(gfp_t mask) +{ + struct extent_state *state; + unsigned long flags; + + state = kmem_cache_alloc(extent_state_cache, mask); + if (!state || IS_ERR(state)) + return state; + state->state = 0; + state->in_tree = 0; + state->private = 0; + + spin_lock_irqsave(&state_lock, flags); + list_add(&state->list, &states); + spin_unlock_irqrestore(&state_lock, flags); + + atomic_set(&state->refs, 1); + init_waitqueue_head(&state->wq); + return state; +} +EXPORT_SYMBOL(alloc_extent_state); + +void free_extent_state(struct extent_state *state) +{ + unsigned long flags; + if (!state) + return; + if (atomic_dec_and_test(&state->refs)) { + WARN_ON(state->in_tree); + spin_lock_irqsave(&state_lock, flags); + list_del(&state->list); + spin_unlock_irqrestore(&state_lock, flags); + kmem_cache_free(extent_state_cache, state); + } +} +EXPORT_SYMBOL(free_extent_state); + +static struct rb_node *tree_insert(struct rb_root *root, u64 offset, + struct rb_node *node) +{ + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct tree_entry *entry; + + while(*p) { + parent = *p; + entry = rb_entry(parent, struct tree_entry, rb_node); + + if (offset < entry->start) + p = &(*p)->rb_left; + else if (offset > entry->end) + p = &(*p)->rb_right; + else + return parent; + } + + entry = rb_entry(node, struct tree_entry, rb_node); + entry->in_tree = 1; + rb_link_node(node, parent, p); + rb_insert_color(node, root); + return NULL; +} + +static struct rb_node *__tree_search(struct rb_root *root, u64 offset, + struct rb_node **prev_ret, + struct rb_node **next_ret) +{ + struct rb_node * n = root->rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev = NULL; + struct tree_entry *entry; + struct tree_entry *prev_entry = NULL; + + while(n) { + entry = rb_entry(n, struct tree_entry, rb_node); + prev = n; + prev_entry = entry; + + if (offset < entry->start) + n = n->rb_left; + else if (offset > entry->end) + n = n->rb_right; + else + return n; + } + + if (prev_ret) { + orig_prev = prev; + while(prev && offset > prev_entry->end) { + prev = rb_next(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *prev_ret = prev; + prev = orig_prev; + } + + if (next_ret) { + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + while(prev && offset < prev_entry->start) { + prev = rb_prev(prev); + prev_entry = rb_entry(prev, struct tree_entry, rb_node); + } + *next_ret = prev; + } + return NULL; +} + +static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) +{ + struct rb_node *prev; + struct rb_node *ret; + ret = __tree_search(root, offset, &prev, NULL); + if (!ret) + return prev; + return ret; +} + +/* + * utility function to look for merge candidates inside a given range. + * Any extents with matching state are merged together into a single + * extent in the tree. Extents with EXTENT_IO in their state field + * are not merged because the end_io handlers need to be able to do + * operations on them without sleeping (or doing allocations/splits). + * + * This should be called with the tree lock held. + */ +static int merge_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + struct extent_state *other; + struct rb_node *other_node; + + if (state->state & EXTENT_IOBITS) + return 0; + + other_node = rb_prev(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->end == state->start - 1 && + other->state == state->state) { + state->start = other->start; + other->in_tree = 0; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); + } + } + other_node = rb_next(&state->rb_node); + if (other_node) { + other = rb_entry(other_node, struct extent_state, rb_node); + if (other->start == state->end + 1 && + other->state == state->state) { + other->start = state->start; + state->in_tree = 0; + rb_erase(&state->rb_node, &tree->state); + free_extent_state(state); + } + } + return 0; +} + +/* + * insert an extent_state struct into the tree. 'bits' are set on the + * struct before it is inserted. + * + * This may return -EEXIST if the extent is already there, in which case the + * state struct is freed. + * + * The tree lock is not taken internally. This is a utility function and + * probably isn't what you want to call (see set/clear_extent_bit). + */ +static int insert_state(struct extent_io_tree *tree, + struct extent_state *state, u64 start, u64 end, + int bits) +{ + struct rb_node *node; + + if (end < start) { + printk("end < start %Lu %Lu\n", end, start); + WARN_ON(1); + } + if (bits & EXTENT_DIRTY) + tree->dirty_bytes += end - start + 1; + state->state |= bits; + state->start = start; + state->end = end; + node = tree_insert(&tree->state, end, &state->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); + free_extent_state(state); + return -EEXIST; + } + merge_state(tree, state); + return 0; +} + +/* + * split a given extent state struct in two, inserting the preallocated + * struct 'prealloc' as the newly created second half. 'split' indicates an + * offset inside 'orig' where it should be split. + * + * Before calling, + * the tree has 'orig' at [orig->start, orig->end]. After calling, there + * are two extent state structs in the tree: + * prealloc: [orig->start, split - 1] + * orig: [ split, orig->end ] + * + * The tree locks are not taken by this function. They need to be held + * by the caller. + */ +static int split_state(struct extent_io_tree *tree, struct extent_state *orig, + struct extent_state *prealloc, u64 split) +{ + struct rb_node *node; + prealloc->start = orig->start; + prealloc->end = split - 1; + prealloc->state = orig->state; + orig->start = split; + + node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); + if (node) { + struct extent_state *found; + found = rb_entry(node, struct extent_state, rb_node); + printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); + free_extent_state(prealloc); + return -EEXIST; + } + return 0; +} + +/* + * utility function to clear some bits in an extent state struct. + * it will optionally wake up any one waiting on this state (wake == 1), or + * forcibly remove the state from the tree (delete == 1). + * + * If no bits are set on the state struct after clearing things, the + * struct is freed and removed from the tree + */ +static int clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, int bits, int wake, + int delete) +{ + int ret = state->state & bits; + + if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + WARN_ON(range > tree->dirty_bytes); + tree->dirty_bytes -= range; + } + state->state &= ~bits; + if (wake) + wake_up(&state->wq); + if (delete || state->state == 0) { + if (state->in_tree) { + rb_erase(&state->rb_node, &tree->state); + state->in_tree = 0; + free_extent_state(state); + } else { + WARN_ON(1); + } + } else { + merge_state(tree, state); + } + return ret; +} + +/* + * clear some bits on a range in the tree. This may require splitting + * or inserting elements in the tree, so the gfp mask is used to + * indicate which allocations or sleeping are allowed. + * + * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove + * the given range from the tree regardless of state (ie for truncate). + * + * the range [start, end] is inclusive. + * + * This takes the tree lock, and returns < 0 on error, > 0 if any of the + * bits were already set, or zero if none of the bits were already set. + */ +int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int wake, int delete, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err; + int set = 0; + +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + goto out; + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > end) + goto out; + WARN_ON(state->end < start); + + /* + * | ---- desired range ---- | + * | state | or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip + * bits on second half. + * + * If the extent we found extends past our range, we + * just split and search again. It'll get split again + * the next time though. + * + * If the extent we found is inside our range, we clear + * the desired bit on it. + */ + + if (state->start < start) { + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, + wake, delete); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and clear the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + if (wake) + wake_up(&state->wq); + set |= clear_state_bit(tree, prealloc, bits, + wake, delete); + prealloc = NULL; + goto out; + } + + start = state->end + 1; + set |= clear_state_bit(tree, state, bits, wake, delete); + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return set; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(clear_extent_bit); + +static int wait_on_state(struct extent_io_tree *tree, + struct extent_state *state) +{ + DEFINE_WAIT(wait); + prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); + read_unlock_irq(&tree->lock); + schedule(); + read_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + return 0; +} + +/* + * waits for one or more bits to clear on a range in the state tree. + * The range [start, end] is inclusive. + * The tree lock is taken by this function + */ +int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) +{ + struct extent_state *state; + struct rb_node *node; + + read_lock_irq(&tree->lock); +again: + while (1) { + /* + * this search will find all the extents that end after + * our range starts + */ + node = tree_search(&tree->state, start); + if (!node) + break; + + state = rb_entry(node, struct extent_state, rb_node); + + if (state->start > end) + goto out; + + if (state->state & bits) { + start = state->start; + atomic_inc(&state->refs); + wait_on_state(tree, state); + free_extent_state(state); + goto again; + } + start = state->end + 1; + + if (start > end) + break; + + if (need_resched()) { + read_unlock_irq(&tree->lock); + cond_resched(); + read_lock_irq(&tree->lock); + } + } +out: + read_unlock_irq(&tree->lock); + return 0; +} +EXPORT_SYMBOL(wait_extent_bit); + +static void set_state_bits(struct extent_io_tree *tree, + struct extent_state *state, + int bits) +{ + if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { + u64 range = state->end - state->start + 1; + tree->dirty_bytes += range; + } + state->state |= bits; +} + +/* + * set some bits on a range in the tree. This may require allocations + * or sleeping, so the gfp mask is used to indicate what is allowed. + * + * If 'exclusive' == 1, this will fail with -EEXIST if some part of the + * range already has the desired bits set. The start of the existing + * range is returned in failed_start in this case. + * + * [start, end] is inclusive + * This takes the tree lock. + */ +int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, + int exclusive, u64 *failed_start, gfp_t mask) +{ + struct extent_state *state; + struct extent_state *prealloc = NULL; + struct rb_node *node; + unsigned long flags; + int err = 0; + int set; + u64 last_start; + u64 last_end; +again: + if (!prealloc && (mask & __GFP_WAIT)) { + prealloc = alloc_extent_state(mask); + if (!prealloc) + return -ENOMEM; + } + + write_lock_irqsave(&tree->lock, flags); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node) { + err = insert_state(tree, prealloc, start, end, bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + goto out; + } + + state = rb_entry(node, struct extent_state, rb_node); + last_start = state->start; + last_end = state->end; + + /* + * | ---- desired range ---- | + * | state | + * + * Just lock what we found and keep going + */ + if (state->start == start && state->end <= end) { + set = state->state & bits; + if (set && exclusive) { + *failed_start = state->start; + err = -EEXIST; + goto out; + } + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + goto search_again; + } + + /* + * | ---- desired range ---- | + * | state | + * or + * | ------------- state -------------- | + * + * We need to split the extent we found, and may flip bits on + * second half. + * + * If the extent we found extends past our + * range, we just split and search again. It'll get split + * again the next time though. + * + * If the extent we found is inside our range, we set the + * desired bit on it. + */ + if (state->start < start) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, start); + BUG_ON(err == -EEXIST); + prealloc = NULL; + if (err) + goto out; + if (state->end <= end) { + set_state_bits(tree, state, bits); + start = state->end + 1; + merge_state(tree, state); + } else { + start = state->start; + } + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | or | state | + * + * There's a hole, we need to insert something in it and + * ignore the extent we found. + */ + if (state->start > start) { + u64 this_end; + if (end < last_start) + this_end = end; + else + this_end = last_start -1; + err = insert_state(tree, prealloc, start, this_end, + bits); + prealloc = NULL; + BUG_ON(err == -EEXIST); + if (err) + goto out; + start = this_end + 1; + goto search_again; + } + /* + * | ---- desired range ---- | + * | state | + * We need to split the extent, and set the bit + * on the first half + */ + if (state->start <= end && state->end > end) { + set = state->state & bits; + if (exclusive && set) { + *failed_start = start; + err = -EEXIST; + goto out; + } + err = split_state(tree, state, prealloc, end + 1); + BUG_ON(err == -EEXIST); + + set_state_bits(tree, prealloc, bits); + merge_state(tree, prealloc); + prealloc = NULL; + goto out; + } + + goto search_again; + +out: + write_unlock_irqrestore(&tree->lock, flags); + if (prealloc) + free_extent_state(prealloc); + + return err; + +search_again: + if (start > end) + goto out; + write_unlock_irqrestore(&tree->lock, flags); + if (mask & __GFP_WAIT) + cond_resched(); + goto again; +} +EXPORT_SYMBOL(set_extent_bit); + +/* wrappers around set/clear extent bit */ +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_dirty); + +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_bits); + +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, bits, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_bits); + +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_delalloc); + +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_dirty); + +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_new); + +int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_new); + +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, + mask); +} +EXPORT_SYMBOL(set_extent_uptodate); + +int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); +} +EXPORT_SYMBOL(clear_extent_uptodate); + +int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, + 0, NULL, mask); +} +EXPORT_SYMBOL(set_extent_writeback); + +int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); +} +EXPORT_SYMBOL(clear_extent_writeback); + +int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); +} +EXPORT_SYMBOL(wait_on_extent_writeback); + +/* + * locks a range in ascending order, waiting for any locked regions + * it hits on the way. [start,end] are inclusive, and this will sleep. + */ +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) +{ + int err; + u64 failed_start; + while (1) { + err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, + &failed_start, mask); + if (err == -EEXIST && (mask & __GFP_WAIT)) { + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + start = failed_start; + } else { + break; + } + WARN_ON(start > end); + } + return err; +} +EXPORT_SYMBOL(lock_extent); + +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); +} +EXPORT_SYMBOL(unlock_extent); + +/* + * helper function to set pages and extents in the tree dirty + */ +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + set_extent_dirty(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_dirty); + +/* + * helper function to set both pages and extents in the tree writeback + */ +int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + BUG_ON(!page); + set_page_writeback(page); + page_cache_release(page); + index++; + } + set_extent_writeback(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(set_range_writeback); + +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 1; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->end >= start && (state->state & bits)) { + *start_ret = state->start; + *end_ret = state->end; + ret = 0; + break; + } + node = rb_next(node); + if (!node) + break; + } +out: + read_unlock_irq(&tree->lock); + return ret; +} +EXPORT_SYMBOL(find_first_extent_bit); + +u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 found = 0; + u64 total_bytes = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ +search_again: + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + *end = (u64)-1; + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (found && state->start != cur_start) { + goto out; + } + if (!(state->state & EXTENT_DELALLOC)) { + if (!found) + *end = state->end; + goto out; + } + if (!found) { + struct extent_state *prev_state; + struct rb_node *prev_node = node; + while(1) { + prev_node = rb_prev(prev_node); + if (!prev_node) + break; + prev_state = rb_entry(prev_node, + struct extent_state, + rb_node); + if (!(prev_state->state & EXTENT_DELALLOC)) + break; + state = prev_state; + node = prev_node; + } + } + if (state->state & EXTENT_LOCKED) { + DEFINE_WAIT(wait); + atomic_inc(&state->refs); + prepare_to_wait(&state->wq, &wait, + TASK_UNINTERRUPTIBLE); + write_unlock_irq(&tree->lock); + schedule(); + write_lock_irq(&tree->lock); + finish_wait(&state->wq, &wait); + free_extent_state(state); + goto search_again; + } + state->state |= EXTENT_LOCKED; + if (!found) + *start = state->start; + found++; + *end = state->end; + cur_start = state->end + 1; + node = rb_next(node); + if (!node) + break; + total_bytes += state->end - state->start + 1; + if (total_bytes >= max_bytes) + break; + } +out: + write_unlock_irq(&tree->lock); + return found; +} + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, u64 max_bytes, + unsigned long bits) +{ + struct rb_node *node; + struct extent_state *state; + u64 cur_start = *start; + u64 total_bytes = 0; + int found = 0; + + if (search_end <= cur_start) { + printk("search_end %Lu start %Lu\n", search_end, cur_start); + WARN_ON(1); + return 0; + } + + write_lock_irq(&tree->lock); + if (cur_start == 0 && bits == EXTENT_DIRTY) { + total_bytes = tree->dirty_bytes; + goto out; + } + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, cur_start); + if (!node || IS_ERR(node)) { + goto out; + } + + while(1) { + state = rb_entry(node, struct extent_state, rb_node); + if (state->start > search_end) + break; + if (state->end >= cur_start && (state->state & bits)) { + total_bytes += min(search_end, state->end) + 1 - + max(cur_start, state->start); + if (total_bytes >= max_bytes) + break; + if (!found) { + *start = state->start; + found = 1; + } + } + node = rb_next(node); + if (!node) + break; + } +out: + write_unlock_irq(&tree->lock); + return total_bytes; +} +/* + * helper function to lock both pages and extents in the tree. + * pages must be locked first. + */ +int lock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + int err; + + while (index <= end_index) { + page = grab_cache_page(tree->mapping, index); + if (!page) { + err = -ENOMEM; + goto failed; + } + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto failed; + } + index++; + } + lock_extent(tree, start, end, GFP_NOFS); + return 0; + +failed: + /* + * we failed above in getting the page at 'index', so we undo here + * up to but not including the page at 'index' + */ + end_index = index; + index = start >> PAGE_CACHE_SHIFT; + while (index < end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + return err; +} +EXPORT_SYMBOL(lock_range); + +/* + * helper function to unlock both pages and extents in the tree. + */ +int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(tree->mapping, index); + unlock_page(page); + page_cache_release(page); + index++; + } + unlock_extent(tree, start, end, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(unlock_range); + +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + write_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + state->private = private; +out: + write_unlock_irq(&tree->lock); + return ret; +} + +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) +{ + struct rb_node *node; + struct extent_state *state; + int ret = 0; + + read_lock_irq(&tree->lock); + /* + * this search will find all the extents that end after + * our range starts. + */ + node = tree_search(&tree->state, start); + if (!node || IS_ERR(node)) { + ret = -ENOENT; + goto out; + } + state = rb_entry(node, struct extent_state, rb_node); + if (state->start != start) { + ret = -ENOENT; + goto out; + } + *private = state->private; +out: + read_unlock_irq(&tree->lock); + return ret; +} + +/* + * searches a range in the state tree for a given mask. + * If 'filled' == 1, this returns 1 only if ever extent in the tree + * has the bits set. Otherwise, 1 is returned if any bit in the + * range is found set. + */ +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled) +{ + struct extent_state *state = NULL; + struct rb_node *node; + int bitset = 0; + unsigned long flags; + + read_lock_irqsave(&tree->lock, flags); + node = tree_search(&tree->state, start); + while (node && start <= end) { + state = rb_entry(node, struct extent_state, rb_node); + + if (filled && state->start > start) { + bitset = 0; + break; + } + + if (state->start > end) + break; + + if (state->state & bits) { + bitset = 1; + if (!filled) + break; + } else if (filled) { + bitset = 0; + break; + } + start = state->end + 1; + if (start > end) + break; + node = rb_next(node); + if (!node) { + if (filled) + bitset = 0; + break; + } + } + read_unlock_irqrestore(&tree->lock, flags); + return bitset; +} +EXPORT_SYMBOL(test_range_bit); + +/* + * helper function to set a given page up to date if all the + * extents in the tree for that page are up to date + */ +static int check_page_uptodate(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) + SetPageUptodate(page); + return 0; +} + +/* + * helper function to unlock a page if all the extents in the tree + * for that page are unlocked + */ +static int check_page_locked(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) + unlock_page(page); + return 0; +} + +/* + * helper function to end page writeback if all the extents + * in the tree for that page are done with writeback + */ +static int check_page_writeback(struct extent_io_tree *tree, + struct page *page) +{ + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) + end_page_writeback(page); + return 0; +} + +/* lots and lots of room for performance fixes in the end_bio funcs */ + +/* + * after a writepage IO is done, we need to: + * clear the uptodate bits on error + * clear the writeback bits in the extent tree for this IO + * end_page_writeback if the page has no more pending IO + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_writepage(struct bio *bio, int err) +#else +static int end_bio_extent_writepage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, GFP_ATOMIC); + ClearPageUptodate(page); + SetPageError(page); + } + clear_extent_writeback(tree, start, end, GFP_ATOMIC); + + if (whole_page) + end_page_writeback(page); + else + check_page_writeback(tree, page); + if (tree->ops && tree->ops->writepage_end_io_hook) + tree->ops->writepage_end_io_hook(page, start, end); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * after a readpage IO is done, we need to: + * clear the uptodate bits on error + * set the uptodate bits if things worked + * set the page up to date if all extents in the tree are uptodate + * clear the lock bit in the extent tree + * unlock the page if there are no other extents locked for it + * + * Scheduling is not allowed, so the extent state tree is expected + * to have one and only one object corresponding to this IO. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_readpage(struct bio *bio, int err) +#else +static int end_bio_extent_readpage(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + int whole_page; + int ret; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) + whole_page = 1; + else + whole_page = 0; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { + ret = tree->ops->readpage_end_io_hook(page, start, end); + if (ret) + uptodate = 0; + } + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + if (whole_page) + SetPageUptodate(page); + else + check_page_uptodate(tree, page); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + if (whole_page) + unlock_page(page); + else + check_page_locked(tree, page); + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +/* + * IO done from prepare_write is pretty simple, we just unlock + * the structs in the extent tree when done, and set the uptodate bits + * as appropriate. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_bio_extent_preparewrite(struct bio *bio, int err) +#else +static int end_bio_extent_preparewrite(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct extent_io_tree *tree = bio->bi_private; + u64 start; + u64 end; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + do { + struct page *page = bvec->bv_page; + start = ((u64)page->index << PAGE_CACHE_SHIFT) + + bvec->bv_offset; + end = start + bvec->bv_len - 1; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (uptodate) { + set_extent_uptodate(tree, start, end, GFP_ATOMIC); + } else { + ClearPageUptodate(page); + SetPageError(page); + } + + unlock_extent(tree, start, end, GFP_ATOMIC); + + } while (bvec >= bio->bi_io_vec); + + bio_put(bio); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + +static struct bio * +extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, + gfp_t gfp_flags) +{ + struct bio *bio; + + bio = bio_alloc(gfp_flags, nr_vecs); + + if (bio == NULL && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(gfp_flags, nr_vecs); + } + + if (bio) { + bio->bi_bdev = bdev; + bio->bi_sector = first_sector; + } + return bio; +} + +static int submit_one_bio(int rw, struct bio *bio) +{ + u64 maxsector; + int ret = 0; + + bio_get(bio); + + maxsector = bio->bi_bdev->bd_inode->i_size >> 9; + if (maxsector < bio->bi_sector) { + printk("sector too large max %Lu got %llu\n", maxsector, + (unsigned long long)bio->bi_sector); + WARN_ON(1); + } + + submit_bio(rw, bio); + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + bio_put(bio); + return ret; +} + +static int submit_extent_page(int rw, struct extent_io_tree *tree, + struct page *page, sector_t sector, + size_t size, unsigned long offset, + struct block_device *bdev, + struct bio **bio_ret, + unsigned long max_pages, + bio_end_io_t end_io_func) +{ + int ret = 0; + struct bio *bio; + int nr; + + if (bio_ret && *bio_ret) { + bio = *bio_ret; + if (bio->bi_sector + (bio->bi_size >> 9) != sector || + bio_add_page(bio, page, size, offset) < size) { + ret = submit_one_bio(rw, bio); + bio = NULL; + } else { + return 0; + } + } + nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); + bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); + if (!bio) { + printk("failed to allocate bio nr %d\n", nr); + } + bio_add_page(bio, page, size, offset); + bio->bi_end_io = end_io_func; + bio->bi_private = tree; + if (bio_ret) { + *bio_ret = bio; + } else { + ret = submit_one_bio(rw, bio); + } + + return ret; +} + +void set_page_extent_mapped(struct page *page) +{ + if (!PagePrivate(page)) { + SetPagePrivate(page); + WARN_ON(!page->mapping->a_ops->invalidatepage); + set_page_private(page, EXTENT_PAGE_PRIVATE); + page_cache_get(page); + } +} + +void set_page_extent_head(struct page *page, unsigned long len) +{ + set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); +} + +/* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io + * handlers) + */ +static int __extent_read_full_page(struct extent_io_tree *tree, + struct page *page, + get_extent_t *get_extent, + struct bio **bio) +{ + struct inode *inode = page->mapping->host; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 cur_end; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t iosize; + size_t blocksize = inode->i_sb->s_blocksize; + + set_page_extent_mapped(page); + + end = page_end; + lock_extent(tree, start, end, GFP_NOFS); + + while (cur <= end) { + if (cur >= last_byte) { + char *userpage; + iosize = PAGE_CACHE_SIZE - page_offset; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + break; + } + em = get_extent(inode, page, page_offset, cur, + end - cur + 1, 0); + if (IS_ERR(em) || !em) { + SetPageError(page); + unlock_extent(tree, cur, end, GFP_NOFS); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + cur_end = min(extent_map_end(em) - 1, end); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + /* we've found a hole, just zero and go on */ + if (block_start == EXTENT_MAP_HOLE) { + char *userpage; + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + page_offset, 0, iosize); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + + set_extent_uptodate(tree, cur, cur + iosize - 1, + GFP_NOFS); + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + /* the get_extent function already copied into the page */ + if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { + unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + ret = 0; + if (tree->ops && tree->ops->readpage_io_hook) { + ret = tree->ops->readpage_io_hook(page, cur, + cur + iosize - 1); + } + if (!ret) { + unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + nr -= page->index; + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, + bdev, bio, nr, + end_bio_extent_readpage); + } + if (ret) + SetPageError(page); + cur = cur + iosize; + page_offset += iosize; + nr++; + } + if (!nr) { + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + } + return 0; +} + +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent) +{ + struct bio *bio = NULL; + int ret; + + ret = __extent_read_full_page(tree, page, get_extent, &bio); + if (bio) + submit_one_bio(READ, bio); + return ret; +} +EXPORT_SYMBOL(extent_read_full_page); + +/* + * the writepage semantics are similar to regular writepage. extent + * records are inserted to lock ranges in the tree, and as dirty areas + * are found, they are marked writeback. Then the lock bits are removed + * and the end_io handler clears the writeback ranges + */ +static int __extent_writepage(struct page *page, struct writeback_control *wbc, + void *data) +{ + struct inode *inode = page->mapping->host; + struct extent_page_data *epd = data; + struct extent_io_tree *tree = epd->tree; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 delalloc_start; + u64 page_end = start + PAGE_CACHE_SIZE - 1; + u64 end; + u64 cur = start; + u64 extent_offset; + u64 last_byte = i_size_read(inode); + u64 block_start; + u64 iosize; + sector_t sector; + struct extent_map *em; + struct block_device *bdev; + int ret; + int nr = 0; + size_t page_offset = 0; + size_t blocksize; + loff_t i_size = i_size_read(inode); + unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; + u64 nr_delalloc; + u64 delalloc_end; + + WARN_ON(!PageLocked(page)); + if (page->index > end_index) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; + } + + if (page->index == end_index) { + char *userpage; + + size_t offset = i_size & (PAGE_CACHE_SIZE - 1); + + userpage = kmap_atomic(page, KM_USER0); + memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(userpage, KM_USER0); + } + + set_page_extent_mapped(page); + + delalloc_start = start; + delalloc_end = 0; + while(delalloc_end < page_end) { + nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, + &delalloc_end, + 128 * 1024 * 1024); + if (nr_delalloc == 0) { + delalloc_start = delalloc_end + 1; + continue; + } + tree->ops->fill_delalloc(inode, delalloc_start, + delalloc_end); + clear_extent_bit(tree, delalloc_start, + delalloc_end, + EXTENT_LOCKED | EXTENT_DELALLOC, + 1, 0, GFP_NOFS); + delalloc_start = delalloc_end + 1; + } + lock_extent(tree, start, page_end, GFP_NOFS); + + end = page_end; + if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { + printk("found delalloc bits after lock_extent\n"); + } + + if (last_byte <= start) { + clear_extent_dirty(tree, start, page_end, GFP_NOFS); + goto done; + } + + set_extent_uptodate(tree, start, page_end, GFP_NOFS); + blocksize = inode->i_sb->s_blocksize; + + while (cur <= end) { + if (cur >= last_byte) { + clear_extent_dirty(tree, cur, page_end, GFP_NOFS); + break; + } + em = epd->get_extent(inode, page, page_offset, cur, + end - cur + 1, 1); + if (IS_ERR(em) || !em) { + SetPageError(page); + break; + } + + extent_offset = cur - em->start; + BUG_ON(extent_map_end(em) <= cur); + BUG_ON(end < cur); + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + sector = (em->block_start + extent_offset) >> 9; + bdev = em->bdev; + block_start = em->block_start; + free_extent_map(em); + em = NULL; + + if (block_start == EXTENT_MAP_HOLE || + block_start == EXTENT_MAP_INLINE) { + clear_extent_dirty(tree, cur, + cur + iosize - 1, GFP_NOFS); + cur = cur + iosize; + page_offset += iosize; + continue; + } + + /* leave this out until we have a page_mkwrite call */ + if (0 && !test_range_bit(tree, cur, cur + iosize - 1, + EXTENT_DIRTY, 0)) { + cur = cur + iosize; + page_offset += iosize; + continue; + } + clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); + if (tree->ops && tree->ops->writepage_io_hook) { + ret = tree->ops->writepage_io_hook(page, cur, + cur + iosize - 1); + } else { + ret = 0; + } + if (ret) + SetPageError(page); + else { + unsigned long max_nr = end_index + 1; + set_range_writeback(tree, cur, cur + iosize - 1); + if (!PageWriteback(page)) { + printk("warning page %lu not writeback, " + "cur %llu end %llu\n", page->index, + (unsigned long long)cur, + (unsigned long long)end); + } + + ret = submit_extent_page(WRITE, tree, page, sector, + iosize, page_offset, bdev, + &epd->bio, max_nr, + end_bio_extent_writepage); + if (ret) + SetPageError(page); + } + cur = cur + iosize; + page_offset += iosize; + nr++; + } +done: + if (nr == 0) { + /* make sure the mapping tag for page dirty gets cleared */ + set_page_writeback(page); + end_page_writeback(page); + } + unlock_extent(tree, start, page_end, GFP_NOFS); + unlock_page(page); + return 0; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + +/* Taken directly from 2.6.23 for 2.6.18 back port */ +typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, + void *data); + +/** + * write_cache_pages - walk the list of dirty pages of the given address space + * and write all of them. + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @writepage: function called for each page + * @data: data passed to writepage function + * + * If a page is already under I/O, write_cache_pages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + */ +static int write_cache_pages(struct address_space *mapping, + struct writeback_control *wbc, writepage_t writepage, + void *data) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc, data); + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { + unlock_page(page); + ret = 0; + } + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} +#endif + +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret; + struct address_space *mapping = page->mapping; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + struct writeback_control wbc_writepages = { + .bdi = wbc->bdi, + .sync_mode = WB_SYNC_NONE, + .older_than_this = NULL, + .nr_to_write = 64, + .range_start = page_offset(page) + PAGE_CACHE_SIZE, + .range_end = (loff_t)-1, + }; + + + ret = __extent_writepage(page, wbc, &epd); + + write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_write_full_page); + + +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc) +{ + int ret = 0; + struct extent_page_data epd = { + .bio = NULL, + .tree = tree, + .get_extent = get_extent, + }; + + ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); + if (epd.bio) { + submit_one_bio(WRITE, epd.bio); + } + return ret; +} +EXPORT_SYMBOL(extent_writepages); + +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent) +{ + struct bio *bio = NULL; + unsigned page_idx; + struct pagevec pvec; + + pagevec_init(&pvec, 0); + for (page_idx = 0; page_idx < nr_pages; page_idx++) { + struct page *page = list_entry(pages->prev, struct page, lru); + + prefetchw(&page->flags); + list_del(&page->lru); + /* + * what we want to do here is call add_to_page_cache_lru, + * but that isn't exported, so we reproduce it here + */ + if (!add_to_page_cache(page, mapping, + page->index, GFP_KERNEL)) { + + /* open coding of lru_cache_add, also not exported */ + page_cache_get(page); + if (!pagevec_add(&pvec, page)) + __pagevec_lru_add(&pvec); + __extent_read_full_page(tree, page, get_extent, &bio); + } + page_cache_release(page); + } + if (pagevec_count(&pvec)) + __pagevec_lru_add(&pvec); + BUG_ON(!list_empty(pages)); + if (bio) + submit_one_bio(READ, bio); + return 0; +} +EXPORT_SYMBOL(extent_readpages); + +/* + * basic invalidatepage code, this waits on any locked or writeback + * ranges corresponding to the page, and then deletes any extent state + * records from the tree + */ +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset) +{ + u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 end = start + PAGE_CACHE_SIZE - 1; + size_t blocksize = page->mapping->host->i_sb->s_blocksize; + + start += (offset + blocksize -1) & ~(blocksize - 1); + if (start > end) + return 0; + + lock_extent(tree, start, end, GFP_NOFS); + wait_on_extent_writeback(tree, start, end); + clear_extent_bit(tree, start, end, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + 1, 1, GFP_NOFS); + return 0; +} +EXPORT_SYMBOL(extent_invalidatepage); + +/* + * simple commit_write call, set_range_dirty is used to mark both + * the pages and the extent records as dirty + */ +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + + set_page_extent_mapped(page); + set_page_dirty(page); + + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); + } + return 0; +} +EXPORT_SYMBOL(extent_commit_write); + +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent) +{ + u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + u64 block_start; + u64 orig_block_start; + u64 block_end; + u64 cur_end; + struct extent_map *em; + unsigned blocksize = 1 << inode->i_blkbits; + size_t page_offset = 0; + size_t block_off_start; + size_t block_off_end; + int err = 0; + int iocount = 0; + int ret = 0; + int isnew; + + set_page_extent_mapped(page); + + block_start = (page_start + from) & ~((u64)blocksize - 1); + block_end = (page_start + to - 1) | (blocksize - 1); + orig_block_start = block_start; + + lock_extent(tree, page_start, page_end, GFP_NOFS); + while(block_start <= block_end) { + em = get_extent(inode, page, page_offset, block_start, + block_end - block_start + 1, 1); + if (IS_ERR(em) || !em) { + goto err; + } + cur_end = min(block_end, extent_map_end(em) - 1); + block_off_start = block_start & (PAGE_CACHE_SIZE - 1); + block_off_end = block_off_start + blocksize; + isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); + + if (!PageUptodate(page) && isnew && + (block_off_end > to || block_off_start < from)) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_off_end > to) + memset(kaddr + to, 0, block_off_end - to); + if (block_off_start < from) + memset(kaddr + block_off_start, 0, + from - block_off_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + if ((em->block_start != EXTENT_MAP_HOLE && + em->block_start != EXTENT_MAP_INLINE) && + !isnew && !PageUptodate(page) && + (block_off_end > to || block_off_start < from) && + !test_range_bit(tree, block_start, cur_end, + EXTENT_UPTODATE, 1)) { + u64 sector; + u64 extent_offset = block_start - em->start; + size_t iosize; + sector = (em->block_start + extent_offset) >> 9; + iosize = (cur_end - block_start + blocksize) & + ~((u64)blocksize - 1); + /* + * we've already got the extent locked, but we + * need to split the state such that our end_bio + * handler can clear the lock. + */ + set_extent_bit(tree, block_start, + block_start + iosize - 1, + EXTENT_LOCKED, 0, NULL, GFP_NOFS); + ret = submit_extent_page(READ, tree, page, + sector, iosize, page_offset, em->bdev, + NULL, 1, + end_bio_extent_preparewrite); + iocount++; + block_start = block_start + iosize; + } else { + set_extent_uptodate(tree, block_start, cur_end, + GFP_NOFS); + unlock_extent(tree, block_start, cur_end, GFP_NOFS); + block_start = cur_end + 1; + } + page_offset = block_start & (PAGE_CACHE_SIZE - 1); + free_extent_map(em); + } + if (iocount) { + wait_extent_bit(tree, orig_block_start, + block_end, EXTENT_LOCKED); + } + check_page_uptodate(tree, page); +err: + /* FIXME, zero out newly allocated blocks on error */ + return err; +} +EXPORT_SYMBOL(extent_prepare_write); + +/* + * a helper for releasepage. As long as there are no locked extents + * in the range corresponding to the page, both state records and extent + * map records are removed + */ +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page) +{ + struct extent_map *em; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 end = start + PAGE_CACHE_SIZE - 1; + u64 orig_start = start; + int ret = 1; + + while (start <= end) { + spin_lock(&map->lock); + em = lookup_extent_mapping(map, start, end); + if (!em || IS_ERR(em)) { + spin_unlock(&map->lock); + break; + } + if (!test_range_bit(tree, em->start, extent_map_end(em) - 1, + EXTENT_LOCKED, 0)) { + remove_extent_mapping(map, em); + /* once for the rb tree */ + free_extent_map(em); + } + start = extent_map_end(em); + spin_unlock(&map->lock); + + /* once for us */ + free_extent_map(em); + } + if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) + ret = 0; + else + clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, + 1, 1, GFP_NOFS); + return ret; +} +EXPORT_SYMBOL(try_release_extent_mapping); + +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent) +{ + struct inode *inode = mapping->host; + u64 start = iblock << inode->i_blkbits; + sector_t sector = 0; + struct extent_map *em; + + em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0); + if (!em || IS_ERR(em)) + return 0; + + if (em->block_start == EXTENT_MAP_INLINE || + em->block_start == EXTENT_MAP_HOLE) + goto out; + + sector = (em->block_start + start - em->start) >> inode->i_blkbits; +printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); +out: + free_extent_map(em); + return sector; +} + +static int add_lru(struct extent_io_tree *tree, struct extent_buffer *eb) +{ + if (list_empty(&eb->lru)) { + extent_buffer_get(eb); + list_add(&eb->lru, &tree->buffer_lru); + tree->lru_size++; + if (tree->lru_size >= BUFFER_LRU_MAX) { + struct extent_buffer *rm; + rm = list_entry(tree->buffer_lru.prev, + struct extent_buffer, lru); + tree->lru_size--; + list_del_init(&rm->lru); + free_extent_buffer(rm); + } + } else + list_move(&eb->lru, &tree->buffer_lru); + return 0; +} +static struct extent_buffer *find_lru(struct extent_io_tree *tree, + u64 start, unsigned long len) +{ + struct list_head *lru = &tree->buffer_lru; + struct list_head *cur = lru->next; + struct extent_buffer *eb; + + if (list_empty(lru)) + return NULL; + + do { + eb = list_entry(cur, struct extent_buffer, lru); + if (eb->start == start && eb->len == len) { + extent_buffer_get(eb); + return eb; + } + cur = cur->next; + } while (cur != lru); + return NULL; +} + +static inline unsigned long num_extent_pages(u64 start, u64 len) +{ + return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + (start >> PAGE_CACHE_SHIFT); +} + +static inline struct page *extent_buffer_page(struct extent_buffer *eb, + unsigned long i) +{ + struct page *p; + struct address_space *mapping; + + if (i == 0) + return eb->first_page; + i += eb->start >> PAGE_CACHE_SHIFT; + mapping = eb->first_page->mapping; + read_lock_irq(&mapping->tree_lock); + p = radix_tree_lookup(&mapping->page_tree, i); + read_unlock_irq(&mapping->tree_lock); + return p; +} + +static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, + unsigned long len, + gfp_t mask) +{ + struct extent_buffer *eb = NULL; + + spin_lock(&tree->lru_lock); + eb = find_lru(tree, start, len); + spin_unlock(&tree->lru_lock); + if (eb) { + return eb; + } + + eb = kmem_cache_zalloc(extent_buffer_cache, mask); + INIT_LIST_HEAD(&eb->lru); + eb->start = start; + eb->len = len; + atomic_set(&eb->refs, 1); + + return eb; +} + +static void __free_extent_buffer(struct extent_buffer *eb) +{ + kmem_cache_free(extent_buffer_cache, eb); +} + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + if (page0) { + eb->first_page = page0; + i = 1; + index++; + page_cache_get(page0); + mark_page_accessed(page0); + set_page_extent_mapped(page0); + WARN_ON(!PageUptodate(page0)); + set_page_extent_head(page0, len); + } else { + i = 0; + } + for (; i < num_pages; i++, index++) { + p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); + if (!p) { + WARN_ON(1); + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; + +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(alloc_extent_buffer); + +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask) +{ + unsigned long num_pages = num_extent_pages(start, len); + unsigned long i; + unsigned long index = start >> PAGE_CACHE_SHIFT; + struct extent_buffer *eb; + struct page *p; + struct address_space *mapping = tree->mapping; + int uptodate = 1; + + eb = __alloc_extent_buffer(tree, start, len, mask); + if (!eb || IS_ERR(eb)) + return NULL; + + if (eb->flags & EXTENT_BUFFER_FILLED) + goto lru_add; + + for (i = 0; i < num_pages; i++, index++) { + p = find_lock_page(mapping, index); + if (!p) { + goto fail; + } + set_page_extent_mapped(p); + mark_page_accessed(p); + + if (i == 0) { + eb->first_page = p; + set_page_extent_head(p, len); + } else { + set_page_private(p, EXTENT_PAGE_PRIVATE); + } + + if (!PageUptodate(p)) + uptodate = 0; + unlock_page(p); + } + if (uptodate) + eb->flags |= EXTENT_UPTODATE; + eb->flags |= EXTENT_BUFFER_FILLED; + +lru_add: + spin_lock(&tree->lru_lock); + add_lru(tree, eb); + spin_unlock(&tree->lru_lock); + return eb; +fail: + spin_lock(&tree->lru_lock); + list_del_init(&eb->lru); + spin_unlock(&tree->lru_lock); + if (!atomic_dec_and_test(&eb->refs)) + return NULL; + for (index = 1; index < i; index++) { + page_cache_release(extent_buffer_page(eb, index)); + } + if (i > 0) + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); + return NULL; +} +EXPORT_SYMBOL(find_extent_buffer); + +void free_extent_buffer(struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + if (!eb) + return; + + if (!atomic_dec_and_test(&eb->refs)) + return; + + WARN_ON(!list_empty(&eb->lru)); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 1; i < num_pages; i++) { + page_cache_release(extent_buffer_page(eb, i)); + } + page_cache_release(extent_buffer_page(eb, 0)); + __free_extent_buffer(eb); +} +EXPORT_SYMBOL(free_extent_buffer); + +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + int set; + unsigned long i; + unsigned long num_pages; + struct page *page; + + u64 start = eb->start; + u64 end = start + eb->len - 1; + + set = clear_extent_dirty(tree, start, end, GFP_NOFS); + num_pages = num_extent_pages(eb->start, eb->len); + + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + lock_page(page); + if (i == 0) + set_page_extent_head(page, eb->len); + else + set_page_private(page, EXTENT_PAGE_PRIVATE); + + /* + * if we're on the last page or the first page and the + * block isn't aligned on a page boundary, do extra checks + * to make sure we don't clean page that is partially dirty + */ + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + start = (u64)page->index << PAGE_CACHE_SHIFT; + end = start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(tree, start, end, + EXTENT_DIRTY, 0)) { + unlock_page(page); + continue; + } + } + clear_page_dirty_for_io(page); + write_lock_irq(&page->mapping->tree_lock); + if (!PageDirty(page)) { + radix_tree_tag_clear(&page->mapping->page_tree, + page_index(page), + PAGECACHE_TAG_DIRTY); + } + write_unlock_irq(&page->mapping->tree_lock); + unlock_page(page); + } + return 0; +} +EXPORT_SYMBOL(clear_extent_buffer_dirty); + +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + return wait_on_extent_writeback(tree, eb->start, + eb->start + eb->len - 1); +} +EXPORT_SYMBOL(wait_on_extent_buffer_writeback); + +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = 0; i < num_pages; i++) { + struct page *page = extent_buffer_page(eb, i); + /* writepage may need to do something special for the + * first page, we have to make sure page->private is + * properly set. releasepage may drop page->private + * on us if the page isn't already dirty. + */ + if (i == 0) { + lock_page(page); + set_page_extent_head(page, eb->len); + } else if (PagePrivate(page) && + page->private != EXTENT_PAGE_PRIVATE) { + lock_page(page); + set_page_extent_mapped(page); + unlock_page(page); + } + __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); + if (i == 0) + unlock_page(page); + } + return set_extent_dirty(tree, eb->start, + eb->start + eb->len - 1, GFP_NOFS); +} +EXPORT_SYMBOL(set_extent_buffer_dirty); + +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + unsigned long i; + struct page *page; + unsigned long num_pages; + + num_pages = num_extent_pages(eb->start, eb->len); + + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + for (i = 0; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || + ((i == num_pages - 1) && + ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { + check_page_uptodate(tree, page); + continue; + } + SetPageUptodate(page); + } + return 0; +} +EXPORT_SYMBOL(set_extent_buffer_uptodate); + +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb) +{ + if (eb->flags & EXTENT_UPTODATE) + return 1; + return test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1); +} +EXPORT_SYMBOL(extent_buffer_uptodate); + +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, + u64 start, + int wait) +{ + unsigned long i; + unsigned long start_i; + struct page *page; + int err; + int ret = 0; + unsigned long num_pages; + + if (eb->flags & EXTENT_UPTODATE) + return 0; + + if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1)) { + return 0; + } + + if (start) { + WARN_ON(start < eb->start); + start_i = (start >> PAGE_CACHE_SHIFT) - + (eb->start >> PAGE_CACHE_SHIFT); + } else { + start_i = 0; + } + + num_pages = num_extent_pages(eb->start, eb->len); + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + if (PageUptodate(page)) { + continue; + } + if (!wait) { + if (TestSetPageLocked(page)) { + continue; + } + } else { + lock_page(page); + } + if (!PageUptodate(page)) { + err = page->mapping->a_ops->readpage(NULL, page); + if (err) { + ret = err; + } + } else { + unlock_page(page); + } + } + + if (ret || !wait) { + return ret; + } + + for (i = start_i; i < num_pages; i++) { + page = extent_buffer_page(eb, i); + wait_on_page_locked(page); + if (!PageUptodate(page)) { + ret = -EIO; + } + } + if (!ret) + eb->flags |= EXTENT_UPTODATE; + return ret; +} +EXPORT_SYMBOL(read_extent_buffer_pages); + +void read_extent_buffer(struct extent_buffer *eb, void *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *dst = (char *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long num_pages = num_extent_pages(eb->start, eb->len); + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + if (!PageUptodate(page)) { + printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); + WARN_ON(1); + } + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(dst, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER1); + + dst += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(read_extent_buffer); + +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + size_t offset = start & (PAGE_CACHE_SIZE - 1); + char *kaddr; + struct page *p; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + unsigned long end_i = (start_offset + start + min_len - 1) >> + PAGE_CACHE_SHIFT; + + if (i != end_i) + return -EINVAL; + + if (i == 0) { + offset = start_offset; + *map_start = 0; + } else { + offset = 0; + *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; + } + if (start + min_len > eb->len) { +printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); + WARN_ON(1); + } + + p = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(p)); + kaddr = kmap_atomic(p, km); + *token = kaddr; + *map = kaddr + offset; + *map_len = PAGE_CACHE_SIZE - offset; + return 0; +} +EXPORT_SYMBOL(map_private_extent_buffer); + +int map_extent_buffer(struct extent_buffer *eb, unsigned long start, + unsigned long min_len, + char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km) +{ + int err; + int save = 0; + if (eb->map_token) { + unmap_extent_buffer(eb, eb->map_token, km); + eb->map_token = NULL; + save = 1; + } + err = map_private_extent_buffer(eb, start, min_len, token, map, + map_start, map_len, km); + if (!err && save) { + eb->map_token = *token; + eb->kaddr = *map; + eb->map_start = *map_start; + eb->map_len = *map_len; + } + return err; +} +EXPORT_SYMBOL(map_extent_buffer); + +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) +{ + kunmap_atomic(token, km); +} +EXPORT_SYMBOL(unmap_extent_buffer); + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *ptr = (char *)ptrv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + ret = memcmp(ptr, kaddr + offset, cur); + kunmap_atomic(kaddr, KM_USER0); + if (ret) + break; + + ptr += cur; + len -= cur; + offset = 0; + i++; + } + return ret; +} +EXPORT_SYMBOL(memcmp_extent_buffer); + +void write_extent_buffer(struct extent_buffer *eb, const void *srcv, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char *src = (char *)srcv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER1); + memcpy(kaddr + offset, src, cur); + kunmap_atomic(kaddr, KM_USER1); + + src += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(write_extent_buffer); + +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(eb, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, PAGE_CACHE_SIZE - offset); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, c, cur); + kunmap_atomic(kaddr, KM_USER0); + + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(memset_extent_buffer); + +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len) +{ + u64 dst_len = dst->len; + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + + WARN_ON(src->len != dst_len); + + offset = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + while(len > 0) { + page = extent_buffer_page(dst, i); + WARN_ON(!PageUptodate(page)); + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); + + kaddr = kmap_atomic(page, KM_USER0); + read_extent_buffer(src, kaddr + offset, src_offset, cur); + kunmap_atomic(kaddr, KM_USER0); + + src_offset += cur; + len -= cur; + offset = 0; + i++; + } +} +EXPORT_SYMBOL(copy_extent_buffer); + +static void move_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + if (dst_page == src_page) { + memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); + } else { + char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *p = dst_kaddr + dst_off + len; + char *s = src_kaddr + src_off + len; + + while (len--) + *--p = *--s; + + kunmap_atomic(src_kaddr, KM_USER1); + } + kunmap_atomic(dst_kaddr, KM_USER0); +} + +static void copy_pages(struct page *dst_page, struct page *src_page, + unsigned long dst_off, unsigned long src_off, + unsigned long len) +{ + char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *src_kaddr; + + if (dst_page != src_page) + src_kaddr = kmap_atomic(src_page, KM_USER1); + else + src_kaddr = dst_kaddr; + + memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); + kunmap_atomic(dst_kaddr, KM_USER0); + if (dst_page != src_page) + kunmap_atomic(src_kaddr, KM_USER1); +} + +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + + while(len > 0) { + dst_off_in_page = (start_offset + dst_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_offset) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; + + cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - + src_off_in_page)); + cur = min_t(unsigned long, cur, + (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); + + copy_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page, src_off_in_page, cur); + + src_offset += cur; + dst_offset += cur; + len -= cur; + } +} +EXPORT_SYMBOL(memcpy_extent_buffer); + +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len) +{ + size_t cur; + size_t dst_off_in_page; + size_t src_off_in_page; + unsigned long dst_end = dst_offset + len - 1; + unsigned long src_end = src_offset + len - 1; + size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long dst_i; + unsigned long src_i; + + if (src_offset + len > dst->len) { + printk("memmove bogus src_offset %lu move len %lu len %lu\n", + src_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset + len > dst->len) { + printk("memmove bogus dst_offset %lu move len %lu len %lu\n", + dst_offset, len, dst->len); + BUG_ON(1); + } + if (dst_offset < src_offset) { + memcpy_extent_buffer(dst, dst_offset, src_offset, len); + return; + } + while(len > 0) { + dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; + src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; + + dst_off_in_page = (start_offset + dst_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + src_off_in_page = (start_offset + src_end) & + ((unsigned long)PAGE_CACHE_SIZE - 1); + + cur = min_t(unsigned long, len, src_off_in_page + 1); + cur = min(cur, dst_off_in_page + 1); + move_pages(extent_buffer_page(dst, dst_i), + extent_buffer_page(dst, src_i), + dst_off_in_page - cur + 1, + src_off_in_page - cur + 1, cur); + + dst_end -= cur; + src_end -= cur; + len -= cur; + } +} +EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h new file mode 100644 index 000000000000..06be1fe84b29 --- /dev/null +++ b/fs/btrfs/extent_io.h @@ -0,0 +1,193 @@ +#ifndef __EXTENTIO__ +#define __EXTENTIO__ + +#include + +/* bits for the extent state */ +#define EXTENT_DIRTY 1 +#define EXTENT_WRITEBACK (1 << 1) +#define EXTENT_UPTODATE (1 << 2) +#define EXTENT_LOCKED (1 << 3) +#define EXTENT_NEW (1 << 4) +#define EXTENT_DELALLOC (1 << 5) +#define EXTENT_DEFRAG (1 << 6) +#define EXTENT_DEFRAG_DONE (1 << 7) +#define EXTENT_BUFFER_FILLED (1 << 8) +#define EXTENT_CSUM (1 << 9) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + +/* + * page->private values. Every page that is controlled by the extent + * map has page->private set to one. + */ +#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 + +struct extent_io_ops { + int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); + int (*writepage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_io_hook)(struct page *page, u64 start, u64 end); + int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); + void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); +}; + +struct extent_io_tree { + struct rb_root state; + struct address_space *mapping; + u64 dirty_bytes; + rwlock_t lock; + struct extent_io_ops *ops; + spinlock_t lru_lock; + struct list_head buffer_lru; + int lru_size; +}; + +struct extent_state { + u64 start; + u64 end; /* inclusive */ + int in_tree; + struct rb_node rb_node; + wait_queue_head_t wq; + atomic_t refs; + unsigned long state; + + /* for use by the FS */ + u64 private; + + struct list_head list; +}; + +struct extent_buffer { + u64 start; + unsigned long len; + char *map_token; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + struct page *first_page; + struct list_head lru; + atomic_t refs; + int flags; +}; + +struct extent_map_tree; + +typedef struct extent_map *(get_extent_t)(struct inode *inode, + struct page *page, + size_t page_offset, + u64 start, u64 len, + int create); + +void extent_io_tree_init(struct extent_io_tree *tree, + struct address_space *mapping, gfp_t mask); +void extent_io_tree_empty_lru(struct extent_io_tree *tree); +int try_release_extent_mapping(struct extent_map_tree *map, + struct extent_io_tree *tree, struct page *page); +int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int extent_read_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent); +int __init extent_io_init(void); +void extent_io_exit(void); + +u64 count_range_bits(struct extent_io_tree *tree, + u64 *start, u64 search_end, + u64 max_bytes, unsigned long bits); + +int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, + int bits, int filled); +int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, + int bits, gfp_t mask); +int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, + gfp_t mask); +int find_first_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, int bits); +int extent_invalidatepage(struct extent_io_tree *tree, + struct page *page, unsigned long offset); +int extent_write_full_page(struct extent_io_tree *tree, struct page *page, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_writepages(struct extent_io_tree *tree, + struct address_space *mapping, + get_extent_t *get_extent, + struct writeback_control *wbc); +int extent_readpages(struct extent_io_tree *tree, + struct address_space *mapping, + struct list_head *pages, unsigned nr_pages, + get_extent_t get_extent); +int extent_prepare_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to, get_extent_t *get_extent); +int extent_commit_write(struct extent_io_tree *tree, + struct inode *inode, struct page *page, + unsigned from, unsigned to); +sector_t extent_bmap(struct address_space *mapping, sector_t iblock, + get_extent_t *get_extent); +int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); +int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); +int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); +void set_page_extent_mapped(struct page *page); + +struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + struct page *page0, + gfp_t mask); +struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree, + u64 start, unsigned long len, + gfp_t mask); +void free_extent_buffer(struct extent_buffer *eb); +int read_extent_buffer_pages(struct extent_io_tree *tree, + struct extent_buffer *eb, u64 start, int wait); + +static inline void extent_buffer_get(struct extent_buffer *eb) +{ + atomic_inc(&eb->refs); +} + +int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, + unsigned long start, + unsigned long len); +void read_extent_buffer(struct extent_buffer *eb, void *dst, + unsigned long start, + unsigned long len); +void write_extent_buffer(struct extent_buffer *eb, const void *src, + unsigned long start, unsigned long len); +void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, + unsigned long dst_offset, unsigned long src_offset, + unsigned long len); +void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, + unsigned long src_offset, unsigned long len); +void memset_extent_buffer(struct extent_buffer *eb, char c, + unsigned long start, unsigned long len); +int wait_on_extent_buffer_writeback(struct extent_io_tree *tree, + struct extent_buffer *eb); +int clear_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_dirty(struct extent_io_tree *tree, + struct extent_buffer *eb); +int set_extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int extent_buffer_uptodate(struct extent_io_tree *tree, + struct extent_buffer *eb); +int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, + unsigned long min_len, char **token, char **map, + unsigned long *map_start, + unsigned long *map_len, int km); +void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); +#endif diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 010a287fbd71..268ad8facf6e 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,17 +1,10 @@ -#include -#include -#include -#include +#include #include -#include -#include +#include #include #include -#include -#include #include -#include -#include +#include #include "extent_map.h" /* temporary define until extent_map moves out of btrfs */ @@ -21,27 +14,6 @@ struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long)); static struct kmem_cache *extent_map_cache; -static struct kmem_cache *extent_state_cache; -static struct kmem_cache *extent_buffer_cache; - -static LIST_HEAD(buffers); -static LIST_HEAD(states); - -static spinlock_t state_lock = SPIN_LOCK_UNLOCKED; -#define BUFFER_LRU_MAX 64 - -struct tree_entry { - u64 start; - u64 end; - int in_tree; - struct rb_node rb_node; -}; - -struct extent_page_data { - struct bio *bio; - struct extent_map_tree *tree; - get_extent_t *get_extent; -}; int __init extent_map_init(void) { @@ -50,72 +22,23 @@ int __init extent_map_init(void) NULL); if (!extent_map_cache) return -ENOMEM; - extent_state_cache = btrfs_cache_create("extent_state", - sizeof(struct extent_state), 0, - NULL); - if (!extent_state_cache) - goto free_map_cache; - extent_buffer_cache = btrfs_cache_create("extent_buffers", - sizeof(struct extent_buffer), 0, - NULL); - if (!extent_buffer_cache) - goto free_state_cache; return 0; - -free_state_cache: - kmem_cache_destroy(extent_state_cache); -free_map_cache: - kmem_cache_destroy(extent_map_cache); - return -ENOMEM; } void extent_map_exit(void) { - struct extent_state *state; - - while (!list_empty(&states)) { - state = list_entry(states.next, struct extent_state, list); - printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); - list_del(&state->list); - kmem_cache_free(extent_state_cache, state); - - } - if (extent_map_cache) kmem_cache_destroy(extent_map_cache); - if (extent_state_cache) - kmem_cache_destroy(extent_state_cache); - if (extent_buffer_cache) - kmem_cache_destroy(extent_buffer_cache); } -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask) +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) { tree->map.rb_node = NULL; - tree->state.rb_node = NULL; - tree->ops = NULL; - tree->dirty_bytes = 0; - rwlock_init(&tree->lock); - spin_lock_init(&tree->lru_lock); - tree->mapping = mapping; - INIT_LIST_HEAD(&tree->buffer_lru); - tree->lru_size = 0; + tree->last = NULL; + spin_lock_init(&tree->lock); } EXPORT_SYMBOL(extent_map_tree_init); -void extent_map_tree_empty_lru(struct extent_map_tree *tree) -{ - struct extent_buffer *eb; - while(!list_empty(&tree->buffer_lru)) { - eb = list_entry(tree->buffer_lru.next, struct extent_buffer, - lru); - list_del_init(&eb->lru); - free_extent_buffer(eb); - } -} -EXPORT_SYMBOL(extent_map_tree_empty_lru); - struct extent_map *alloc_extent_map(gfp_t mask) { struct extent_map *em; @@ -123,6 +46,7 @@ struct extent_map *alloc_extent_map(gfp_t mask) if (!em || IS_ERR(em)) return em; em->in_tree = 0; + em->flags = 0; atomic_set(&em->refs, 1); return em; } @@ -132,6 +56,7 @@ void free_extent_map(struct extent_map *em) { if (!em) return; + WARN_ON(atomic_read(&em->refs) == 0); if (atomic_dec_and_test(&em->refs)) { WARN_ON(em->in_tree); kmem_cache_free(extent_map_cache, em); @@ -139,64 +64,28 @@ void free_extent_map(struct extent_map *em) } EXPORT_SYMBOL(free_extent_map); - -struct extent_state *alloc_extent_state(gfp_t mask) -{ - struct extent_state *state; - unsigned long flags; - - state = kmem_cache_alloc(extent_state_cache, mask); - if (!state || IS_ERR(state)) - return state; - state->state = 0; - state->in_tree = 0; - state->private = 0; - - spin_lock_irqsave(&state_lock, flags); - list_add(&state->list, &states); - spin_unlock_irqrestore(&state_lock, flags); - - atomic_set(&state->refs, 1); - init_waitqueue_head(&state->wq); - return state; -} -EXPORT_SYMBOL(alloc_extent_state); - -void free_extent_state(struct extent_state *state) -{ - unsigned long flags; - if (!state) - return; - if (atomic_dec_and_test(&state->refs)) { - WARN_ON(state->in_tree); - spin_lock_irqsave(&state_lock, flags); - list_del(&state->list); - spin_unlock_irqrestore(&state_lock, flags); - kmem_cache_free(extent_state_cache, state); - } -} -EXPORT_SYMBOL(free_extent_state); - static struct rb_node *tree_insert(struct rb_root *root, u64 offset, struct rb_node *node) { struct rb_node ** p = &root->rb_node; struct rb_node * parent = NULL; - struct tree_entry *entry; + struct extent_map *entry; while(*p) { parent = *p; - entry = rb_entry(parent, struct tree_entry, rb_node); + entry = rb_entry(parent, struct extent_map, rb_node); + + WARN_ON(!entry->in_tree); if (offset < entry->start) p = &(*p)->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) p = &(*p)->rb_right; else return parent; } - entry = rb_entry(node, struct tree_entry, rb_node); + entry = rb_entry(node, struct extent_map, rb_node); entry->in_tree = 1; rb_link_node(node, parent, p); rb_insert_color(node, root); @@ -210,17 +99,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node * n = root->rb_node; struct rb_node *prev = NULL; struct rb_node *orig_prev = NULL; - struct tree_entry *entry; - struct tree_entry *prev_entry = NULL; + struct extent_map *entry; + struct extent_map *prev_entry = NULL; while(n) { - entry = rb_entry(n, struct tree_entry, rb_node); + entry = rb_entry(n, struct extent_map, rb_node); prev = n; prev_entry = entry; + WARN_ON(!entry->in_tree); + if (offset < entry->start) n = n->rb_left; - else if (offset > entry->end) + else if (offset >= extent_map_end(entry)) n = n->rb_right; else return n; @@ -228,19 +119,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, if (prev_ret) { orig_prev = prev; - while(prev && offset > prev_entry->end) { + while(prev && offset >= extent_map_end(prev_entry)) { prev = rb_next(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *prev_ret = prev; prev = orig_prev; } if (next_ret) { - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); while(prev && offset < prev_entry->start) { prev = rb_prev(prev); - prev_entry = rb_entry(prev, struct tree_entry, rb_node); + prev_entry = rb_entry(prev, struct extent_map, rb_node); } *next_ret = prev; } @@ -257,22 +148,26 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } -static int tree_delete(struct rb_root *root, u64 offset) +static int mergable_maps(struct extent_map *prev, struct extent_map *next) { - struct rb_node *node; - struct tree_entry *entry; - - node = __tree_search(root, offset, NULL, NULL); - if (!node) - return -ENOENT; - entry = rb_entry(node, struct tree_entry, rb_node); - entry->in_tree = 0; - rb_erase(node, root); + if (extent_map_end(prev) == next->start && + prev->flags == next->flags && + prev->bdev == next->bdev && + ((next->block_start == EXTENT_MAP_HOLE && + prev->block_start == EXTENT_MAP_HOLE) || + (next->block_start == EXTENT_MAP_INLINE && + prev->block_start == EXTENT_MAP_INLINE) || + (next->block_start == EXTENT_MAP_DELALLOC && + prev->block_start == EXTENT_MAP_DELALLOC) || + (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && + next->block_start == extent_map_block_end(prev)))) { + return 1; + } return 0; } /* - * add_extent_mapping tries a simple backward merge with existing + * add_extent_mapping tries a simple forward/backward merge with existing * mappings. The extent_map struct passed in will be inserted into * the tree directly (no copies made, just a reference taken). */ @@ -280,13 +175,12 @@ int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { int ret = 0; - struct extent_map *prev = NULL; + struct extent_map *merge = NULL; struct rb_node *rb; - write_lock_irq(&tree->lock); - rb = tree_insert(&tree->map, em->end, &em->rb_node); + rb = tree_insert(&tree->map, em->start, &em->rb_node); if (rb) { - prev = rb_entry(rb, struct extent_map, rb_node); + merge = rb_entry(rb, struct extent_map, rb_node); ret = -EEXIST; goto out; } @@ -294,53 +188,60 @@ int add_extent_mapping(struct extent_map_tree *tree, if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) - prev = rb_entry(rb, struct extent_map, rb_node); - if (prev && prev->end + 1 == em->start && - ((em->block_start == EXTENT_MAP_HOLE && - prev->block_start == EXTENT_MAP_HOLE) || - (em->block_start == EXTENT_MAP_INLINE && - prev->block_start == EXTENT_MAP_INLINE) || - (em->block_start == EXTENT_MAP_DELALLOC && - prev->block_start == EXTENT_MAP_DELALLOC) || - (em->block_start < EXTENT_MAP_DELALLOC - 1 && - em->block_start == prev->block_end + 1))) { - em->start = prev->start; - em->block_start = prev->block_start; - rb_erase(&prev->rb_node, &tree->map); - prev->in_tree = 0; - free_extent_map(prev); + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(merge, em)) { + em->start = merge->start; + em->len += merge->len; + em->block_start = merge->block_start; + merge->in_tree = 0; + rb_erase(&merge->rb_node, &tree->map); + free_extent_map(merge); } } + rb = rb_next(&em->rb_node); + if (rb) + merge = rb_entry(rb, struct extent_map, rb_node); + if (rb && mergable_maps(em, merge)) { + em->len += merge->len; + rb_erase(&merge->rb_node, &tree->map); + merge->in_tree = 0; + free_extent_map(merge); + } + tree->last = em; out: - write_unlock_irq(&tree->lock); return ret; } EXPORT_SYMBOL(add_extent_mapping); +static u64 range_end(u64 start, u64 len) +{ + if (start + len < start) + return (u64)-1; + return start + len; +} + /* * lookup_extent_mapping returns the first extent_map struct in the - * tree that intersects the [start, end] (inclusive) range. There may + * tree that intersects the [start, len] range. There may * be additional objects in the tree that intersect, so check the object * returned carefully to make sure you don't need additional lookups. */ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end) + u64 start, u64 len) { struct extent_map *em; struct rb_node *rb_node; - struct rb_node *prev = NULL; - struct rb_node *next = NULL; + struct rb_node *prev = NULL; struct rb_node *next = NULL; u64 end = range_end(start, len); em = tree->last; if (em && end > em->start && start < extent_map_end(em)) goto found; - read_lock_irq(&tree->lock); rb_node = __tree_search(&tree->map, start, &prev, &next); if (!rb_node && prev) { em = rb_entry(prev, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node && next) { em = rb_entry(next, struct extent_map, rb_node); - if (em->start <= end && em->end >= start) + if (end > em->start && start < extent_map_end(em)) goto found; } if (!rb_node) { @@ -352,14 +253,16 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, goto out; } em = rb_entry(rb_node, struct extent_map, rb_node); - if (em->end < start || em->start > end) { - em = NULL; - goto out; - } + if (end > em->start && start < extent_map_end(em)) + goto found; + + em = NULL; + goto out; + found: atomic_inc(&em->refs); + tree->last = em; out: - read_unlock_irq(&tree->lock); return em; } EXPORT_SYMBOL(lookup_extent_mapping); @@ -370,2866 +273,12 @@ EXPORT_SYMBOL(lookup_extent_mapping); */ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) { - int ret; + int ret = 0; - write_lock_irq(&tree->lock); - ret = tree_delete(&tree->map, em->end); - write_unlock_irq(&tree->lock); + rb_erase(&em->rb_node, &tree->map); + em->in_tree = 0; + if (tree->last == em) + tree->last = NULL; return ret; } EXPORT_SYMBOL(remove_extent_mapping); - -/* - * utility function to look for merge candidates inside a given range. - * Any extents with matching state are merged together into a single - * extent in the tree. Extents with EXTENT_IO in their state field - * are not merged because the end_io handlers need to be able to do - * operations on them without sleeping (or doing allocations/splits). - * - * This should be called with the tree lock held. - */ -static int merge_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - struct extent_state *other; - struct rb_node *other_node; - - if (state->state & EXTENT_IOBITS) - return 0; - - other_node = rb_prev(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->end == state->start - 1 && - other->state == state->state) { - state->start = other->start; - other->in_tree = 0; - rb_erase(&other->rb_node, &tree->state); - free_extent_state(other); - } - } - other_node = rb_next(&state->rb_node); - if (other_node) { - other = rb_entry(other_node, struct extent_state, rb_node); - if (other->start == state->end + 1 && - other->state == state->state) { - other->start = state->start; - state->in_tree = 0; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - } - } - return 0; -} - -/* - * insert an extent_state struct into the tree. 'bits' are set on the - * struct before it is inserted. - * - * This may return -EEXIST if the extent is already there, in which case the - * state struct is freed. - * - * The tree lock is not taken internally. This is a utility function and - * probably isn't what you want to call (see set/clear_extent_bit). - */ -static int insert_state(struct extent_map_tree *tree, - struct extent_state *state, u64 start, u64 end, - int bits) -{ - struct rb_node *node; - - if (end < start) { - printk("end < start %Lu %Lu\n", end, start); - WARN_ON(1); - } - if (bits & EXTENT_DIRTY) - tree->dirty_bytes += end - start + 1; - state->state |= bits; - state->start = start; - state->end = end; - node = tree_insert(&tree->state, end, &state->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end); - free_extent_state(state); - return -EEXIST; - } - merge_state(tree, state); - return 0; -} - -/* - * split a given extent state struct in two, inserting the preallocated - * struct 'prealloc' as the newly created second half. 'split' indicates an - * offset inside 'orig' where it should be split. - * - * Before calling, - * the tree has 'orig' at [orig->start, orig->end]. After calling, there - * are two extent state structs in the tree: - * prealloc: [orig->start, split - 1] - * orig: [ split, orig->end ] - * - * The tree locks are not taken by this function. They need to be held - * by the caller. - */ -static int split_state(struct extent_map_tree *tree, struct extent_state *orig, - struct extent_state *prealloc, u64 split) -{ - struct rb_node *node; - prealloc->start = orig->start; - prealloc->end = split - 1; - prealloc->state = orig->state; - orig->start = split; - - node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node); - if (node) { - struct extent_state *found; - found = rb_entry(node, struct extent_state, rb_node); - printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end); - free_extent_state(prealloc); - return -EEXIST; - } - return 0; -} - -/* - * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). - * - * If no bits are set on the state struct after clearing things, the - * struct is freed and removed from the tree - */ -static int clear_state_bit(struct extent_map_tree *tree, - struct extent_state *state, int bits, int wake, - int delete) -{ - int ret = state->state & bits; - - if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - WARN_ON(range > tree->dirty_bytes); - tree->dirty_bytes -= range; - } - state->state &= ~bits; - if (wake) - wake_up(&state->wq); - if (delete || state->state == 0) { - if (state->in_tree) { - rb_erase(&state->rb_node, &tree->state); - state->in_tree = 0; - free_extent_state(state); - } else { - WARN_ON(1); - } - } else { - merge_state(tree, state); - } - return ret; -} - -/* - * clear some bits on a range in the tree. This may require splitting - * or inserting elements in the tree, so the gfp mask is used to - * indicate which allocations or sleeping are allowed. - * - * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove - * the given range from the tree regardless of state (ie for truncate). - * - * the range [start, end] is inclusive. - * - * This takes the tree lock, and returns < 0 on error, > 0 if any of the - * bits were already set, or zero if none of the bits were already set. - */ -int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int wake, int delete, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err; - int set = 0; - -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - goto out; - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > end) - goto out; - WARN_ON(state->end < start); - - /* - * | ---- desired range ---- | - * | state | or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip - * bits on second half. - * - * If the extent we found extends past our range, we - * just split and search again. It'll get split again - * the next time though. - * - * If the extent we found is inside our range, we clear - * the desired bit on it. - */ - - if (state->start < start) { - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, - wake, delete); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and clear the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - if (wake) - wake_up(&state->wq); - set |= clear_state_bit(tree, prealloc, bits, - wake, delete); - prealloc = NULL; - goto out; - } - - start = state->end + 1; - set |= clear_state_bit(tree, state, bits, wake, delete); - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return set; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(clear_extent_bit); - -static int wait_on_state(struct extent_map_tree *tree, - struct extent_state *state) -{ - DEFINE_WAIT(wait); - prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); - read_unlock_irq(&tree->lock); - schedule(); - read_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - return 0; -} - -/* - * waits for one or more bits to clear on a range in the state tree. - * The range [start, end] is inclusive. - * The tree lock is taken by this function - */ -int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits) -{ - struct extent_state *state; - struct rb_node *node; - - read_lock_irq(&tree->lock); -again: - while (1) { - /* - * this search will find all the extents that end after - * our range starts - */ - node = tree_search(&tree->state, start); - if (!node) - break; - - state = rb_entry(node, struct extent_state, rb_node); - - if (state->start > end) - goto out; - - if (state->state & bits) { - start = state->start; - atomic_inc(&state->refs); - wait_on_state(tree, state); - free_extent_state(state); - goto again; - } - start = state->end + 1; - - if (start > end) - break; - - if (need_resched()) { - read_unlock_irq(&tree->lock); - cond_resched(); - read_lock_irq(&tree->lock); - } - } -out: - read_unlock_irq(&tree->lock); - return 0; -} -EXPORT_SYMBOL(wait_extent_bit); - -static void set_state_bits(struct extent_map_tree *tree, - struct extent_state *state, - int bits) -{ - if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { - u64 range = state->end - state->start + 1; - tree->dirty_bytes += range; - } - state->state |= bits; -} - -/* - * set some bits on a range in the tree. This may require allocations - * or sleeping, so the gfp mask is used to indicate what is allowed. - * - * If 'exclusive' == 1, this will fail with -EEXIST if some part of the - * range already has the desired bits set. The start of the existing - * range is returned in failed_start in this case. - * - * [start, end] is inclusive - * This takes the tree lock. - */ -int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits, - int exclusive, u64 *failed_start, gfp_t mask) -{ - struct extent_state *state; - struct extent_state *prealloc = NULL; - struct rb_node *node; - unsigned long flags; - int err = 0; - int set; - u64 last_start; - u64 last_end; -again: - if (!prealloc && (mask & __GFP_WAIT)) { - prealloc = alloc_extent_state(mask); - if (!prealloc) - return -ENOMEM; - } - - write_lock_irqsave(&tree->lock, flags); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node) { - err = insert_state(tree, prealloc, start, end, bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - goto out; - } - - state = rb_entry(node, struct extent_state, rb_node); - last_start = state->start; - last_end = state->end; - - /* - * | ---- desired range ---- | - * | state | - * - * Just lock what we found and keep going - */ - if (state->start == start && state->end <= end) { - set = state->state & bits; - if (set && exclusive) { - *failed_start = state->start; - err = -EEXIST; - goto out; - } - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - goto search_again; - } - - /* - * | ---- desired range ---- | - * | state | - * or - * | ------------- state -------------- | - * - * We need to split the extent we found, and may flip bits on - * second half. - * - * If the extent we found extends past our - * range, we just split and search again. It'll get split - * again the next time though. - * - * If the extent we found is inside our range, we set the - * desired bit on it. - */ - if (state->start < start) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, start); - BUG_ON(err == -EEXIST); - prealloc = NULL; - if (err) - goto out; - if (state->end <= end) { - set_state_bits(tree, state, bits); - start = state->end + 1; - merge_state(tree, state); - } else { - start = state->start; - } - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | or | state | - * - * There's a hole, we need to insert something in it and - * ignore the extent we found. - */ - if (state->start > start) { - u64 this_end; - if (end < last_start) - this_end = end; - else - this_end = last_start -1; - err = insert_state(tree, prealloc, start, this_end, - bits); - prealloc = NULL; - BUG_ON(err == -EEXIST); - if (err) - goto out; - start = this_end + 1; - goto search_again; - } - /* - * | ---- desired range ---- | - * | state | - * We need to split the extent, and set the bit - * on the first half - */ - if (state->start <= end && state->end > end) { - set = state->state & bits; - if (exclusive && set) { - *failed_start = start; - err = -EEXIST; - goto out; - } - err = split_state(tree, state, prealloc, end + 1); - BUG_ON(err == -EEXIST); - - set_state_bits(tree, prealloc, bits); - merge_state(tree, prealloc); - prealloc = NULL; - goto out; - } - - goto search_again; - -out: - write_unlock_irqrestore(&tree->lock, flags); - if (prealloc) - free_extent_state(prealloc); - - return err; - -search_again: - if (start > end) - goto out; - write_unlock_irqrestore(&tree->lock, flags); - if (mask & __GFP_WAIT) - cond_resched(); - goto again; -} -EXPORT_SYMBOL(set_extent_bit); - -/* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_dirty); - -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_bits); - -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, bits, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_bits); - -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_delalloc); - -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_dirty); - -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_new); - -int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_new); - -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL, - mask); -} -EXPORT_SYMBOL(set_extent_uptodate); - -int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask); -} -EXPORT_SYMBOL(clear_extent_uptodate); - -int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_WRITEBACK, - 0, NULL, mask); -} -EXPORT_SYMBOL(set_extent_writeback); - -int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask); -} -EXPORT_SYMBOL(clear_extent_writeback); - -int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK); -} -EXPORT_SYMBOL(wait_on_extent_writeback); - -/* - * locks a range in ascending order, waiting for any locked regions - * it hits on the way. [start,end] are inclusive, and this will sleep. - */ -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask) -{ - int err; - u64 failed_start; - while (1) { - err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1, - &failed_start, mask); - if (err == -EEXIST && (mask & __GFP_WAIT)) { - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); - start = failed_start; - } else { - break; - } - WARN_ON(start > end); - } - return err; -} -EXPORT_SYMBOL(lock_extent); - -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask); -} -EXPORT_SYMBOL(unlock_extent); - -/* - * helper function to set pages and extents in the tree dirty - */ -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - __set_page_dirty_nobuffers(page); - page_cache_release(page); - index++; - } - set_extent_dirty(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_dirty); - -/* - * helper function to set both pages and extents in the tree writeback - */ -int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - BUG_ON(!page); - set_page_writeback(page); - page_cache_release(page); - index++; - } - set_extent_writeback(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(set_range_writeback); - -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 1; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->end >= start && (state->state & bits)) { - *start_ret = state->start; - *end_ret = state->end; - ret = 0; - break; - } - node = rb_next(node); - if (!node) - break; - } -out: - read_unlock_irq(&tree->lock); - return ret; -} -EXPORT_SYMBOL(find_first_extent_bit); - -u64 find_lock_delalloc_range(struct extent_map_tree *tree, - u64 *start, u64 *end, u64 max_bytes) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 found = 0; - u64 total_bytes = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ -search_again: - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - *end = (u64)-1; - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (found && state->start != cur_start) { - goto out; - } - if (!(state->state & EXTENT_DELALLOC)) { - if (!found) - *end = state->end; - goto out; - } - if (!found) { - struct extent_state *prev_state; - struct rb_node *prev_node = node; - while(1) { - prev_node = rb_prev(prev_node); - if (!prev_node) - break; - prev_state = rb_entry(prev_node, - struct extent_state, - rb_node); - if (!(prev_state->state & EXTENT_DELALLOC)) - break; - state = prev_state; - node = prev_node; - } - } - if (state->state & EXTENT_LOCKED) { - DEFINE_WAIT(wait); - atomic_inc(&state->refs); - prepare_to_wait(&state->wq, &wait, - TASK_UNINTERRUPTIBLE); - write_unlock_irq(&tree->lock); - schedule(); - write_lock_irq(&tree->lock); - finish_wait(&state->wq, &wait); - free_extent_state(state); - goto search_again; - } - state->state |= EXTENT_LOCKED; - if (!found) - *start = state->start; - found++; - *end = state->end; - cur_start = state->end + 1; - node = rb_next(node); - if (!node) - break; - total_bytes += state->end - state->start + 1; - if (total_bytes >= max_bytes) - break; - } -out: - write_unlock_irq(&tree->lock); - return found; -} - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits) -{ - struct rb_node *node; - struct extent_state *state; - u64 cur_start = *start; - u64 total_bytes = 0; - int found = 0; - - if (search_end <= cur_start) { - printk("search_end %Lu start %Lu\n", search_end, cur_start); - WARN_ON(1); - return 0; - } - - write_lock_irq(&tree->lock); - if (cur_start == 0 && bits == EXTENT_DIRTY) { - total_bytes = tree->dirty_bytes; - goto out; - } - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, cur_start); - if (!node || IS_ERR(node)) { - goto out; - } - - while(1) { - state = rb_entry(node, struct extent_state, rb_node); - if (state->start > search_end) - break; - if (state->end >= cur_start && (state->state & bits)) { - total_bytes += min(search_end, state->end) + 1 - - max(cur_start, state->start); - if (total_bytes >= max_bytes) - break; - if (!found) { - *start = state->start; - found = 1; - } - } - node = rb_next(node); - if (!node) - break; - } -out: - write_unlock_irq(&tree->lock); - return total_bytes; -} -/* - * helper function to lock both pages and extents in the tree. - * pages must be locked first. - */ -int lock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - int err; - - while (index <= end_index) { - page = grab_cache_page(tree->mapping, index); - if (!page) { - err = -ENOMEM; - goto failed; - } - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto failed; - } - index++; - } - lock_extent(tree, start, end, GFP_NOFS); - return 0; - -failed: - /* - * we failed above in getting the page at 'index', so we undo here - * up to but not including the page at 'index' - */ - end_index = index; - index = start >> PAGE_CACHE_SHIFT; - while (index < end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - return err; -} -EXPORT_SYMBOL(lock_range); - -/* - * helper function to unlock both pages and extents in the tree. - */ -int unlock_range(struct extent_map_tree *tree, u64 start, u64 end) -{ - unsigned long index = start >> PAGE_CACHE_SHIFT; - unsigned long end_index = end >> PAGE_CACHE_SHIFT; - struct page *page; - - while (index <= end_index) { - page = find_get_page(tree->mapping, index); - unlock_page(page); - page_cache_release(page); - index++; - } - unlock_extent(tree, start, end, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(unlock_range); - -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - write_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - state->private = private; -out: - write_unlock_irq(&tree->lock); - return ret; -} - -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private) -{ - struct rb_node *node; - struct extent_state *state; - int ret = 0; - - read_lock_irq(&tree->lock); - /* - * this search will find all the extents that end after - * our range starts. - */ - node = tree_search(&tree->state, start); - if (!node || IS_ERR(node)) { - ret = -ENOENT; - goto out; - } - state = rb_entry(node, struct extent_state, rb_node); - if (state->start != start) { - ret = -ENOENT; - goto out; - } - *private = state->private; -out: - read_unlock_irq(&tree->lock); - return ret; -} - -/* - * searches a range in the state tree for a given mask. - * If 'filled' == 1, this returns 1 only if ever extent in the tree - * has the bits set. Otherwise, 1 is returned if any bit in the - * range is found set. - */ -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled) -{ - struct extent_state *state = NULL; - struct rb_node *node; - int bitset = 0; - - read_lock_irq(&tree->lock); - node = tree_search(&tree->state, start); - while (node && start <= end) { - state = rb_entry(node, struct extent_state, rb_node); - - if (filled && state->start > start) { - bitset = 0; - break; - } - - if (state->start > end) - break; - - if (state->state & bits) { - bitset = 1; - if (!filled) - break; - } else if (filled) { - bitset = 0; - break; - } - start = state->end + 1; - if (start > end) - break; - node = rb_next(node); - if (!node) { - if (filled) - bitset = 0; - break; - } - } - read_unlock_irq(&tree->lock); - return bitset; -} -EXPORT_SYMBOL(test_range_bit); - -/* - * helper function to set a given page up to date if all the - * extents in the tree for that page are up to date - */ -static int check_page_uptodate(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1)) - SetPageUptodate(page); - return 0; -} - -/* - * helper function to unlock a page if all the extents in the tree - * for that page are unlocked - */ -static int check_page_locked(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0)) - unlock_page(page); - return 0; -} - -/* - * helper function to end page writeback if all the extents - * in the tree for that page are done with writeback - */ -static int check_page_writeback(struct extent_map_tree *tree, - struct page *page) -{ - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0)) - end_page_writeback(page); - return 0; -} - -/* lots and lots of room for performance fixes in the end_bio funcs */ - -/* - * after a writepage IO is done, we need to: - * clear the uptodate bits on error - * clear the writeback bits in the extent tree for this IO - * end_page_writeback if the page has no more pending IO - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_ATOMIC); - ClearPageUptodate(page); - SetPageError(page); - } - clear_extent_writeback(tree, start, end, GFP_ATOMIC); - - if (whole_page) - end_page_writeback(page); - else - check_page_writeback(tree, page); - if (tree->ops && tree->ops->writepage_end_io_hook) - tree->ops->writepage_end_io_hook(page, start, end); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * after a readpage IO is done, we need to: - * clear the uptodate bits on error - * set the uptodate bits if things worked - * set the page up to date if all extents in the tree are uptodate - * clear the lock bit in the extent tree - * unlock the page if there are no other extents locked for it - * - * Scheduling is not allowed, so the extent state tree is expected - * to have one and only one object corresponding to this IO. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - int whole_page; - int ret; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) - whole_page = 1; - else - whole_page = 0; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { - ret = tree->ops->readpage_end_io_hook(page, start, end); - if (ret) - uptodate = 0; - } - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - if (whole_page) - SetPageUptodate(page); - else - check_page_uptodate(tree, page); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - if (whole_page) - unlock_page(page); - else - check_page_locked(tree, page); - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -/* - * IO done from prepare_write is pretty simple, we just unlock - * the structs in the extent tree when done, and set the uptodate bits - * as appropriate. - */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif -{ - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - struct extent_map_tree *tree = bio->bi_private; - u64 start; - u64 end; - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - - do { - struct page *page = bvec->bv_page; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; - end = start + bvec->bv_len - 1; - - if (--bvec >= bio->bi_io_vec) - prefetchw(&bvec->bv_page->flags); - - if (uptodate) { - set_extent_uptodate(tree, start, end, GFP_ATOMIC); - } else { - ClearPageUptodate(page); - SetPageError(page); - } - - unlock_extent(tree, start, end, GFP_ATOMIC); - - } while (bvec >= bio->bi_io_vec); - - bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif -} - -static struct bio * -extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, - gfp_t gfp_flags) -{ - struct bio *bio; - - bio = bio_alloc(gfp_flags, nr_vecs); - - if (bio == NULL && (current->flags & PF_MEMALLOC)) { - while (!bio && (nr_vecs /= 2)) - bio = bio_alloc(gfp_flags, nr_vecs); - } - - if (bio) { - bio->bi_bdev = bdev; - bio->bi_sector = first_sector; - } - return bio; -} - -static int submit_one_bio(int rw, struct bio *bio) -{ - u64 maxsector; - int ret = 0; - - bio_get(bio); - - maxsector = bio->bi_bdev->bd_inode->i_size >> 9; - if (maxsector < bio->bi_sector) { - printk("sector too large max %Lu got %llu\n", maxsector, - (unsigned long long)bio->bi_sector); - WARN_ON(1); - } - - submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - bio_put(bio); - return ret; -} - -static int submit_extent_page(int rw, struct extent_map_tree *tree, - struct page *page, sector_t sector, - size_t size, unsigned long offset, - struct block_device *bdev, - struct bio **bio_ret, - unsigned long max_pages, - bio_end_io_t end_io_func) -{ - int ret = 0; - struct bio *bio; - int nr; - - if (bio_ret && *bio_ret) { - bio = *bio_ret; - if (bio->bi_sector + (bio->bi_size >> 9) != sector || - bio_add_page(bio, page, size, offset) < size) { - ret = submit_one_bio(rw, bio); - bio = NULL; - } else { - return 0; - } - } - nr = min_t(int, max_pages, bio_get_nr_vecs(bdev)); - bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); - if (!bio) { - printk("failed to allocate bio nr %d\n", nr); - } - bio_add_page(bio, page, size, offset); - bio->bi_end_io = end_io_func; - bio->bi_private = tree; - if (bio_ret) { - *bio_ret = bio; - } else { - ret = submit_one_bio(rw, bio); - } - - return ret; -} - -void set_page_extent_mapped(struct page *page) -{ - if (!PagePrivate(page)) { - SetPagePrivate(page); - WARN_ON(!page->mapping->a_ops->invalidatepage); - set_page_private(page, EXTENT_PAGE_PRIVATE); - page_cache_get(page); - } -} - -void set_page_extent_head(struct page *page, unsigned long len) -{ - set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2); -} - -/* - * basic readpage implementation. Locked extent state structs are inserted - * into the tree that are removed when the IO is done (by the end_io - * handlers) - */ -static int __extent_read_full_page(struct extent_map_tree *tree, - struct page *page, - get_extent_t *get_extent, - struct bio **bio) -{ - struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 cur_end; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t iosize; - size_t blocksize = inode->i_sb->s_blocksize; - - set_page_extent_mapped(page); - - end = page_end; - lock_extent(tree, start, end, GFP_NOFS); - - while (cur <= end) { - if (cur >= last_byte) { - char *userpage; - iosize = PAGE_CACHE_SIZE - page_offset; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - break; - } - em = get_extent(inode, page, page_offset, cur, end, 0); - if (IS_ERR(em) || !em) { - SetPageError(page); - unlock_extent(tree, cur, end, GFP_NOFS); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - - iosize = min(em->end - cur, end - cur) + 1; - cur_end = min(em->end, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - /* we've found a hole, just zero and go on */ - if (block_start == EXTENT_MAP_HOLE) { - char *userpage; - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + page_offset, 0, iosize); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - - set_extent_uptodate(tree, cur, cur + iosize - 1, - GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - /* the get_extent function already copied into the page */ - if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) { - unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long nr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - nr -= page->index; - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, - bdev, bio, nr, - end_bio_extent_readpage); - } - if (ret) - SetPageError(page); - cur = cur + iosize; - page_offset += iosize; - nr++; - } - if (!nr) { - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); - } - return 0; -} - -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent) -{ - struct bio *bio = NULL; - int ret; - - ret = __extent_read_full_page(tree, page, get_extent, &bio); - if (bio) - submit_one_bio(READ, bio); - return ret; -} -EXPORT_SYMBOL(extent_read_full_page); - -/* - * the writepage semantics are similar to regular writepage. extent - * records are inserted to lock ranges in the tree, and as dirty areas - * are found, they are marked writeback. Then the lock bits are removed - * and the end_io handler clears the writeback ranges - */ -static int __extent_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct inode *inode = page->mapping->host; - struct extent_page_data *epd = data; - struct extent_map_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 delalloc_start; - u64 page_end = start + PAGE_CACHE_SIZE - 1; - u64 end; - u64 cur = start; - u64 extent_offset; - u64 last_byte = i_size_read(inode); - u64 block_start; - u64 iosize; - sector_t sector; - struct extent_map *em; - struct block_device *bdev; - int ret; - int nr = 0; - size_t page_offset = 0; - size_t blocksize; - loff_t i_size = i_size_read(inode); - unsigned long end_index = i_size >> PAGE_CACHE_SHIFT; - u64 nr_delalloc; - u64 delalloc_end; - - WARN_ON(!PageLocked(page)); - if (page->index > end_index) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; - } - - if (page->index == end_index) { - char *userpage; - - size_t offset = i_size & (PAGE_CACHE_SIZE - 1); - - userpage = kmap_atomic(page, KM_USER0); - memset(userpage + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(userpage, KM_USER0); - } - - set_page_extent_mapped(page); - - delalloc_start = start; - delalloc_end = 0; - while(delalloc_end < page_end) { - nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start, - &delalloc_end, - 128 * 1024 * 1024); - if (nr_delalloc == 0) { - delalloc_start = delalloc_end + 1; - continue; - } - tree->ops->fill_delalloc(inode, delalloc_start, - delalloc_end); - clear_extent_bit(tree, delalloc_start, - delalloc_end, - EXTENT_LOCKED | EXTENT_DELALLOC, - 1, 0, GFP_NOFS); - delalloc_start = delalloc_end + 1; - } - lock_extent(tree, start, page_end, GFP_NOFS); - - end = page_end; - if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) { - printk("found delalloc bits after lock_extent\n"); - } - - if (last_byte <= start) { - clear_extent_dirty(tree, start, page_end, GFP_NOFS); - goto done; - } - - set_extent_uptodate(tree, start, page_end, GFP_NOFS); - blocksize = inode->i_sb->s_blocksize; - - while (cur <= end) { - if (cur >= last_byte) { - clear_extent_dirty(tree, cur, page_end, GFP_NOFS); - break; - } - em = epd->get_extent(inode, page, page_offset, cur, end, 1); - if (IS_ERR(em) || !em) { - SetPageError(page); - break; - } - - extent_offset = cur - em->start; - BUG_ON(em->end < cur); - BUG_ON(end < cur); - iosize = min(em->end - cur, end - cur) + 1; - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); - sector = (em->block_start + extent_offset) >> 9; - bdev = em->bdev; - block_start = em->block_start; - free_extent_map(em); - em = NULL; - - if (block_start == EXTENT_MAP_HOLE || - block_start == EXTENT_MAP_INLINE) { - clear_extent_dirty(tree, cur, - cur + iosize - 1, GFP_NOFS); - cur = cur + iosize; - page_offset += iosize; - continue; - } - - /* leave this out until we have a page_mkwrite call */ - if (0 && !test_range_bit(tree, cur, cur + iosize - 1, - EXTENT_DIRTY, 0)) { - cur = cur + iosize; - page_offset += iosize; - continue; - } - clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS); - if (tree->ops && tree->ops->writepage_io_hook) { - ret = tree->ops->writepage_io_hook(page, cur, - cur + iosize - 1); - } else { - ret = 0; - } - if (ret) - SetPageError(page); - else { - unsigned long max_nr = end_index + 1; - set_range_writeback(tree, cur, cur + iosize - 1); - if (!PageWriteback(page)) { - printk("warning page %lu not writeback, " - "cur %llu end %llu\n", page->index, - (unsigned long long)cur, - (unsigned long long)end); - } - - ret = submit_extent_page(WRITE, tree, page, sector, - iosize, page_offset, bdev, - &epd->bio, max_nr, - end_bio_extent_writepage); - if (ret) - SetPageError(page); - } - cur = cur + iosize; - page_offset += iosize; - nr++; - } -done: - if (nr == 0) { - /* make sure the mapping tag for page dirty gets cleared */ - set_page_writeback(page); - end_page_writeback(page); - } - unlock_extent(tree, start, page_end, GFP_NOFS); - unlock_page(page); - return 0; -} - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - -/* Taken directly from 2.6.23 for 2.6.18 back port */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); - -/** - * write_cache_pages - walk the list of dirty pages of the given address space - * and write all of them. - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @writepage: function called for each page - * @data: data passed to writepage function - * - * If a page is already under I/O, write_cache_pages() skips it, even - * if it's dirty. This is desirable behaviour for memory-cleaning writeback, - * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() - * and msync() need to guarantee that all the data which was dirty at the time - * the call was made get new I/O started against them. If wbc->sync_mode is - * WB_SYNC_ALL then we were called for data integrity and we must wait for - * existing IO to complete. - */ -static int write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) -{ - struct backing_dev_info *bdi = mapping->backing_dev_info; - int ret = 0; - int done = 0; - struct pagevec pvec; - int nr_pages; - pgoff_t index; - pgoff_t end; /* Inclusive */ - int scanned = 0; - int range_whole = 0; - - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - - pagevec_init(&pvec, 0); - if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ - end = -1; - } else { - index = wbc->range_start >> PAGE_CACHE_SHIFT; - end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) - range_whole = 1; - scanned = 1; - } -retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - unsigned i; - - scanned = 1; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - /* - * At this point we hold neither mapping->tree_lock nor - * lock on the page itself: the page may be truncated or - * invalidated (changing page->mapping to NULL), or even - * swizzled back from swapper_space to tmpfs file - * mapping - */ - lock_page(page); - - if (unlikely(page->mapping != mapping)) { - unlock_page(page); - continue; - } - - if (!wbc->range_cyclic && page->index > end) { - done = 1; - unlock_page(page); - continue; - } - - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; - } - - ret = (*writepage)(page, wbc, data); - - if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { - unlock_page(page); - ret = 0; - } - if (ret || (--(wbc->nr_to_write) <= 0)) - done = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - done = 1; - } - } - pagevec_release(&pvec); - cond_resched(); - } - if (!scanned && !done) { - /* - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - scanned = 1; - index = 0; - goto retry; - } - if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; - return ret; -} -#endif - -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret; - struct address_space *mapping = page->mapping; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - struct writeback_control wbc_writepages = { - .bdi = wbc->bdi, - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .nr_to_write = 64, - .range_start = page_offset(page) + PAGE_CACHE_SIZE, - .range_end = (loff_t)-1, - }; - - - ret = __extent_writepage(page, wbc, &epd); - - write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_write_full_page); - - -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc) -{ - int ret = 0; - struct extent_page_data epd = { - .bio = NULL, - .tree = tree, - .get_extent = get_extent, - }; - - ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd); - if (epd.bio) { - submit_one_bio(WRITE, epd.bio); - } - return ret; -} -EXPORT_SYMBOL(extent_writepages); - -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent) -{ - struct bio *bio = NULL; - unsigned page_idx; - struct pagevec pvec; - - pagevec_init(&pvec, 0); - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = list_entry(pages->prev, struct page, lru); - - prefetchw(&page->flags); - list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, - page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add(&pvec); - __extent_read_full_page(tree, page, get_extent, &bio); - } - page_cache_release(page); - } - if (pagevec_count(&pvec)) - __pagevec_lru_add(&pvec); - BUG_ON(!list_empty(pages)); - if (bio) - submit_one_bio(READ, bio); - return 0; -} -EXPORT_SYMBOL(extent_readpages); - -/* - * basic invalidatepage code, this waits on any locked or writeback - * ranges corresponding to the page, and then deletes any extent state - * records from the tree - */ -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset) -{ - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); - u64 end = start + PAGE_CACHE_SIZE - 1; - size_t blocksize = page->mapping->host->i_sb->s_blocksize; - - start += (offset + blocksize -1) & ~(blocksize - 1); - if (start > end) - return 0; - - lock_extent(tree, start, end, GFP_NOFS); - wait_on_extent_writeback(tree, start, end); - clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, GFP_NOFS); - return 0; -} -EXPORT_SYMBOL(extent_invalidatepage); - -/* - * simple commit_write call, set_range_dirty is used to mark both - * the pages and the extent records as dirty - */ -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - - set_page_extent_mapped(page); - set_page_dirty(page); - - if (pos > inode->i_size) { - i_size_write(inode, pos); - mark_inode_dirty(inode); - } - return 0; -} -EXPORT_SYMBOL(extent_commit_write); - -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent) -{ - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 page_end = page_start + PAGE_CACHE_SIZE - 1; - u64 block_start; - u64 orig_block_start; - u64 block_end; - u64 cur_end; - struct extent_map *em; - unsigned blocksize = 1 << inode->i_blkbits; - size_t page_offset = 0; - size_t block_off_start; - size_t block_off_end; - int err = 0; - int iocount = 0; - int ret = 0; - int isnew; - - set_page_extent_mapped(page); - - block_start = (page_start + from) & ~((u64)blocksize - 1); - block_end = (page_start + to - 1) | (blocksize - 1); - orig_block_start = block_start; - - lock_extent(tree, page_start, page_end, GFP_NOFS); - while(block_start <= block_end) { - em = get_extent(inode, page, page_offset, block_start, - block_end, 1); - if (IS_ERR(em) || !em) { - goto err; - } - cur_end = min(block_end, em->end); - block_off_start = block_start & (PAGE_CACHE_SIZE - 1); - block_off_end = block_off_start + blocksize; - isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS); - - if (!PageUptodate(page) && isnew && - (block_off_end > to || block_off_start < from)) { - void *kaddr; - - kaddr = kmap_atomic(page, KM_USER0); - if (block_off_end > to) - memset(kaddr + to, 0, block_off_end - to); - if (block_off_start < from) - memset(kaddr + block_off_start, 0, - from - block_off_start); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - } - if ((em->block_start != EXTENT_MAP_HOLE && - em->block_start != EXTENT_MAP_INLINE) && - !isnew && !PageUptodate(page) && - (block_off_end > to || block_off_start < from) && - !test_range_bit(tree, block_start, cur_end, - EXTENT_UPTODATE, 1)) { - u64 sector; - u64 extent_offset = block_start - em->start; - size_t iosize; - sector = (em->block_start + extent_offset) >> 9; - iosize = (cur_end - block_start + blocksize) & - ~((u64)blocksize - 1); - /* - * we've already got the extent locked, but we - * need to split the state such that our end_bio - * handler can clear the lock. - */ - set_extent_bit(tree, block_start, - block_start + iosize - 1, - EXTENT_LOCKED, 0, NULL, GFP_NOFS); - ret = submit_extent_page(READ, tree, page, - sector, iosize, page_offset, em->bdev, - NULL, 1, - end_bio_extent_preparewrite); - iocount++; - block_start = block_start + iosize; - } else { - set_extent_uptodate(tree, block_start, cur_end, - GFP_NOFS); - unlock_extent(tree, block_start, cur_end, GFP_NOFS); - block_start = cur_end + 1; - } - page_offset = block_start & (PAGE_CACHE_SIZE - 1); - free_extent_map(em); - } - if (iocount) { - wait_extent_bit(tree, orig_block_start, - block_end, EXTENT_LOCKED); - } - check_page_uptodate(tree, page); -err: - /* FIXME, zero out newly allocated blocks on error */ - return err; -} -EXPORT_SYMBOL(extent_prepare_write); - -/* - * a helper for releasepage. As long as there are no locked extents - * in the range corresponding to the page, both state records and extent - * map records are removed - */ -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page) -{ - struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; - u64 end = start + PAGE_CACHE_SIZE - 1; - u64 orig_start = start; - int ret = 1; - - while (start <= end) { - em = lookup_extent_mapping(tree, start, end); - if (!em || IS_ERR(em)) - break; - if (!test_range_bit(tree, em->start, em->end, - EXTENT_LOCKED, 0)) { - remove_extent_mapping(tree, em); - /* once for the rb tree */ - free_extent_map(em); - } - start = em->end + 1; - /* once for us */ - free_extent_map(em); - } - if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) - ret = 0; - else - clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, - 1, 1, GFP_NOFS); - return ret; -} -EXPORT_SYMBOL(try_release_extent_mapping); - -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent) -{ - struct inode *inode = mapping->host; - u64 start = iblock << inode->i_blkbits; - u64 end = start + (1 << inode->i_blkbits) - 1; - sector_t sector = 0; - struct extent_map *em; - - em = get_extent(inode, NULL, 0, start, end, 0); - if (!em || IS_ERR(em)) - return 0; - - if (em->block_start == EXTENT_MAP_INLINE || - em->block_start == EXTENT_MAP_HOLE) - goto out; - - sector = (em->block_start + start - em->start) >> inode->i_blkbits; -out: - free_extent_map(em); - return sector; -} - -static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb) -{ - if (list_empty(&eb->lru)) { - extent_buffer_get(eb); - list_add(&eb->lru, &tree->buffer_lru); - tree->lru_size++; - if (tree->lru_size >= BUFFER_LRU_MAX) { - struct extent_buffer *rm; - rm = list_entry(tree->buffer_lru.prev, - struct extent_buffer, lru); - tree->lru_size--; - list_del_init(&rm->lru); - free_extent_buffer(rm); - } - } else - list_move(&eb->lru, &tree->buffer_lru); - return 0; -} -static struct extent_buffer *find_lru(struct extent_map_tree *tree, - u64 start, unsigned long len) -{ - struct list_head *lru = &tree->buffer_lru; - struct list_head *cur = lru->next; - struct extent_buffer *eb; - - if (list_empty(lru)) - return NULL; - - do { - eb = list_entry(cur, struct extent_buffer, lru); - if (eb->start == start && eb->len == len) { - extent_buffer_get(eb); - return eb; - } - cur = cur->next; - } while (cur != lru); - return NULL; -} - -static inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - -static inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - struct page *p; - struct address_space *mapping; - - if (i == 0) - return eb->first_page; - i += eb->start >> PAGE_CACHE_SHIFT; - mapping = eb->first_page->mapping; - read_lock_irq(&mapping->tree_lock); - p = radix_tree_lookup(&mapping->page_tree, i); - read_unlock_irq(&mapping->tree_lock); - return p; -} - -static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, - unsigned long len, - gfp_t mask) -{ - struct extent_buffer *eb = NULL; - - spin_lock(&tree->lru_lock); - eb = find_lru(tree, start, len); - spin_unlock(&tree->lru_lock); - if (eb) { - return eb; - } - - eb = kmem_cache_zalloc(extent_buffer_cache, mask); - INIT_LIST_HEAD(&eb->lru); - eb->start = start; - eb->len = len; - atomic_set(&eb->refs, 1); - - return eb; -} - -static void __free_extent_buffer(struct extent_buffer *eb) -{ - kmem_cache_free(extent_buffer_cache, eb); -} - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - if (page0) { - eb->first_page = page0; - i = 1; - index++; - page_cache_get(page0); - mark_page_accessed(page0); - set_page_extent_mapped(page0); - WARN_ON(!PageUptodate(page0)); - set_page_extent_head(page0, len); - } else { - i = 0; - } - for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM); - if (!p) { - WARN_ON(1); - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; - -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(alloc_extent_buffer); - -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask) -{ - unsigned long num_pages = num_extent_pages(start, len); - unsigned long i; - unsigned long index = start >> PAGE_CACHE_SHIFT; - struct extent_buffer *eb; - struct page *p; - struct address_space *mapping = tree->mapping; - int uptodate = 1; - - eb = __alloc_extent_buffer(tree, start, len, mask); - if (!eb || IS_ERR(eb)) - return NULL; - - if (eb->flags & EXTENT_BUFFER_FILLED) - goto lru_add; - - for (i = 0; i < num_pages; i++, index++) { - p = find_lock_page(mapping, index); - if (!p) { - goto fail; - } - set_page_extent_mapped(p); - mark_page_accessed(p); - - if (i == 0) { - eb->first_page = p; - set_page_extent_head(p, len); - } else { - set_page_private(p, EXTENT_PAGE_PRIVATE); - } - - if (!PageUptodate(p)) - uptodate = 0; - unlock_page(p); - } - if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; - -lru_add: - spin_lock(&tree->lru_lock); - add_lru(tree, eb); - spin_unlock(&tree->lru_lock); - return eb; -fail: - spin_lock(&tree->lru_lock); - list_del_init(&eb->lru); - spin_unlock(&tree->lru_lock); - if (!atomic_dec_and_test(&eb->refs)) - return NULL; - for (index = 1; index < i; index++) { - page_cache_release(extent_buffer_page(eb, index)); - } - if (i > 0) - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); - return NULL; -} -EXPORT_SYMBOL(find_extent_buffer); - -void free_extent_buffer(struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - if (!eb) - return; - - if (!atomic_dec_and_test(&eb->refs)) - return; - - WARN_ON(!list_empty(&eb->lru)); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 1; i < num_pages; i++) { - page_cache_release(extent_buffer_page(eb, i)); - } - page_cache_release(extent_buffer_page(eb, 0)); - __free_extent_buffer(eb); -} -EXPORT_SYMBOL(free_extent_buffer); - -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - int set; - unsigned long i; - unsigned long num_pages; - struct page *page; - - u64 start = eb->start; - u64 end = start + eb->len - 1; - - set = clear_extent_dirty(tree, start, end, GFP_NOFS); - num_pages = num_extent_pages(eb->start, eb->len); - - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - lock_page(page); - if (i == 0) - set_page_extent_head(page, eb->len); - else - set_page_private(page, EXTENT_PAGE_PRIVATE); - - /* - * if we're on the last page or the first page and the - * block isn't aligned on a page boundary, do extra checks - * to make sure we don't clean page that is partially dirty - */ - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - start = (u64)page->index << PAGE_CACHE_SHIFT; - end = start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(tree, start, end, - EXTENT_DIRTY, 0)) { - unlock_page(page); - continue; - } - } - clear_page_dirty_for_io(page); - write_lock_irq(&page->mapping->tree_lock); - if (!PageDirty(page)) { - radix_tree_tag_clear(&page->mapping->page_tree, - page_index(page), - PAGECACHE_TAG_DIRTY); - } - write_unlock_irq(&page->mapping->tree_lock); - unlock_page(page); - } - return 0; -} -EXPORT_SYMBOL(clear_extent_buffer_dirty); - -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - return wait_on_extent_writeback(tree, eb->start, - eb->start + eb->len - 1); -} -EXPORT_SYMBOL(wait_on_extent_buffer_writeback); - -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = 0; i < num_pages; i++) { - struct page *page = extent_buffer_page(eb, i); - /* writepage may need to do something special for the - * first page, we have to make sure page->private is - * properly set. releasepage may drop page->private - * on us if the page isn't already dirty. - */ - if (i == 0) { - lock_page(page); - set_page_extent_head(page, eb->len); - } else if (PagePrivate(page) && - page->private != EXTENT_PAGE_PRIVATE) { - lock_page(page); - set_page_extent_mapped(page); - unlock_page(page); - } - __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); - if (i == 0) - unlock_page(page); - } - return set_extent_dirty(tree, eb->start, - eb->start + eb->len - 1, GFP_NOFS); -} -EXPORT_SYMBOL(set_extent_buffer_dirty); - -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - unsigned long i; - struct page *page; - unsigned long num_pages; - - num_pages = num_extent_pages(eb->start, eb->len); - - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); - for (i = 0; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || - ((i == num_pages - 1) && - ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { - check_page_uptodate(tree, page); - continue; - } - SetPageUptodate(page); - } - return 0; -} -EXPORT_SYMBOL(set_extent_buffer_uptodate); - -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb) -{ - if (eb->flags & EXTENT_UPTODATE) - return 1; - return test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1); -} -EXPORT_SYMBOL(extent_buffer_uptodate); - -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, - u64 start, - int wait) -{ - unsigned long i; - unsigned long start_i; - struct page *page; - int err; - int ret = 0; - unsigned long num_pages; - - if (eb->flags & EXTENT_UPTODATE) - return 0; - - if (0 && test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1)) { - return 0; - } - - if (start) { - WARN_ON(start < eb->start); - start_i = (start >> PAGE_CACHE_SHIFT) - - (eb->start >> PAGE_CACHE_SHIFT); - } else { - start_i = 0; - } - - num_pages = num_extent_pages(eb->start, eb->len); - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - if (PageUptodate(page)) { - continue; - } - if (!wait) { - if (TestSetPageLocked(page)) { - continue; - } - } else { - lock_page(page); - } - if (!PageUptodate(page)) { - err = page->mapping->a_ops->readpage(NULL, page); - if (err) { - ret = err; - } - } else { - unlock_page(page); - } - } - - if (ret || !wait) { - return ret; - } - - for (i = start_i; i < num_pages; i++) { - page = extent_buffer_page(eb, i); - wait_on_page_locked(page); - if (!PageUptodate(page)) { - ret = -EIO; - } - } - if (!ret) - eb->flags |= EXTENT_UPTODATE; - return ret; -} -EXPORT_SYMBOL(read_extent_buffer_pages); - -void read_extent_buffer(struct extent_buffer *eb, void *dstv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *dst = (char *)dstv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long num_pages = num_extent_pages(eb->start, eb->len); - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - if (!PageUptodate(page)) { - printk("page %lu not up to date i %lu, total %lu, len %lu\n", page->index, i, num_pages, eb->len); - WARN_ON(1); - } - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); - - dst += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(read_extent_buffer); - -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - size_t offset = start & (PAGE_CACHE_SIZE - 1); - char *kaddr; - struct page *p; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - unsigned long end_i = (start_offset + start + min_len - 1) >> - PAGE_CACHE_SHIFT; - - if (i != end_i) - return -EINVAL; - - if (i == 0) { - offset = start_offset; - *map_start = 0; - } else { - offset = 0; - *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset; - } - if (start + min_len > eb->len) { -printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len); - WARN_ON(1); - } - - p = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(p)); - kaddr = kmap_atomic(p, km); - *token = kaddr; - *map = kaddr + offset; - *map_len = PAGE_CACHE_SIZE - offset; - return 0; -} -EXPORT_SYMBOL(map_private_extent_buffer); - -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} -EXPORT_SYMBOL(map_extent_buffer); - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} -EXPORT_SYMBOL(unmap_extent_buffer); - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *ptr = (char *)ptrv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - int ret = 0; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); - if (ret) - break; - - ptr += cur; - len -= cur; - offset = 0; - i++; - } - return ret; -} -EXPORT_SYMBOL(memcmp_extent_buffer); - -void write_extent_buffer(struct extent_buffer *eb, const void *srcv, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - char *src = (char *)srcv; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); - memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); - - src += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(write_extent_buffer); - -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len) -{ - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; - - WARN_ON(start > eb->len); - WARN_ON(start + len > eb->start + eb->len); - - offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(eb, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); - - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(memset_extent_buffer); - -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len) -{ - u64 dst_len = dst->len; - size_t cur; - size_t offset; - struct page *page; - char *kaddr; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - - WARN_ON(src->len != dst_len); - - offset = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - while(len > 0) { - page = extent_buffer_page(dst, i); - WARN_ON(!PageUptodate(page)); - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - - kaddr = kmap_atomic(page, KM_USER0); - read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); - - src_offset += cur; - len -= cur; - offset = 0; - i++; - } -} -EXPORT_SYMBOL(copy_extent_buffer); - -static void move_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - if (dst_page == src_page) { - memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); - } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); - char *p = dst_kaddr + dst_off + len; - char *s = src_kaddr + src_off + len; - - while (len--) - *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); - } - kunmap_atomic(dst_kaddr, KM_USER0); -} - -static void copy_pages(struct page *dst_page, struct page *src_page, - unsigned long dst_off, unsigned long src_off, - unsigned long len) -{ - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); - char *src_kaddr; - - if (dst_page != src_page) - src_kaddr = kmap_atomic(src_page, KM_USER1); - else - src_kaddr = dst_kaddr; - - memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); -} - -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - - while(len > 0) { - dst_off_in_page = (start_offset + dst_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_offset) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT; - - cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - - src_off_in_page)); - cur = min_t(unsigned long, cur, - (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page)); - - copy_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page, src_off_in_page, cur); - - src_offset += cur; - dst_offset += cur; - len -= cur; - } -} -EXPORT_SYMBOL(memcpy_extent_buffer); - -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len) -{ - size_t cur; - size_t dst_off_in_page; - size_t src_off_in_page; - unsigned long dst_end = dst_offset + len - 1; - unsigned long src_end = src_offset + len - 1; - size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1); - unsigned long dst_i; - unsigned long src_i; - - if (src_offset + len > dst->len) { - printk("memmove bogus src_offset %lu move len %lu len %lu\n", - src_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset + len > dst->len) { - printk("memmove bogus dst_offset %lu move len %lu len %lu\n", - dst_offset, len, dst->len); - BUG_ON(1); - } - if (dst_offset < src_offset) { - memcpy_extent_buffer(dst, dst_offset, src_offset, len); - return; - } - while(len > 0) { - dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT; - src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT; - - dst_off_in_page = (start_offset + dst_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - src_off_in_page = (start_offset + src_end) & - ((unsigned long)PAGE_CACHE_SIZE - 1); - - cur = min_t(unsigned long, len, src_off_in_page + 1); - cur = min(cur, dst_off_in_page + 1); - move_pages(extent_buffer_page(dst, dst_i), - extent_buffer_page(dst, src_i), - dst_off_in_page - cur + 1, - src_off_in_page - cur + 1, cur); - - dst_end -= cur; - src_end -= cur; - len -= cur; - } -} -EXPORT_SYMBOL(memmove_extent_buffer); diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index ea60f5447b5b..56314217cfc0 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -3,215 +3,53 @@ #include +#define EXTENT_MAP_LAST_BYTE (u64)-4 #define EXTENT_MAP_HOLE (u64)-3 #define EXTENT_MAP_INLINE (u64)-2 #define EXTENT_MAP_DELALLOC (u64)-1 -/* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_DEFRAG_DONE (1 << 7) -#define EXTENT_BUFFER_FILLED (1 << 8) -#define EXTENT_CSUM (1 << 9) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) - -/* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. - */ -#define EXTENT_PAGE_PRIVATE 1 -#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 - - -struct extent_map_ops { - int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); - int (*writepage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_io_hook)(struct page *page, u64 start, u64 end); - int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); - void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); -}; - -struct extent_map_tree { - struct rb_root map; - struct rb_root state; - struct address_space *mapping; - u64 dirty_bytes; - rwlock_t lock; - struct extent_map_ops *ops; - spinlock_t lru_lock; - struct list_head buffer_lru; - int lru_size; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ struct extent_map { - u64 start; - u64 end; /* inclusive */ - int in_tree; struct rb_node rb_node; - /* block_start and block_end are in bytes */ + + /* all of these are in bytes */ + u64 start; + u64 len; u64 block_start; - u64 block_end; /* inclusive */ + unsigned long flags; struct block_device *bdev; atomic_t refs; -}; - -/* note, this must start with the same fields as fs/extent_map.c:tree_entry */ -struct extent_state { - u64 start; - u64 end; /* inclusive */ int in_tree; - struct rb_node rb_node; - wait_queue_head_t wq; - atomic_t refs; - unsigned long state; - - /* for use by the FS */ - u64 private; - - struct list_head list; }; -struct extent_buffer { - u64 start; - unsigned long len; - char *map_token; - char *kaddr; - unsigned long map_start; - unsigned long map_len; - struct page *first_page; - struct list_head lru; - atomic_t refs; - int flags; +struct extent_map_tree { + struct rb_root map; + struct extent_map *last; + spinlock_t lock; }; -typedef struct extent_map *(get_extent_t)(struct inode *inode, - struct page *page, - size_t page_offset, - u64 start, u64 end, - int create); +static inline u64 extent_map_end(struct extent_map *em) +{ + if (em->start + em->len < em->start) + return (u64)-1; + return em->start + em->len; +} + +static inline u64 extent_map_block_end(struct extent_map *em) +{ + if (em->block_start + em->len < em->block_start) + return (u64)-1; + return em->block_start + em->len; +} -void extent_map_tree_init(struct extent_map_tree *tree, - struct address_space *mapping, gfp_t mask); -void extent_map_tree_empty_lru(struct extent_map_tree *tree); +void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask); struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, - u64 start, u64 end); + u64 start, u64 len); int add_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); -int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page); -int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); -int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask); + struct extent_map *alloc_extent_map(gfp_t mask); void free_extent_map(struct extent_map *em); -int extent_read_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent); int __init extent_map_init(void); void extent_map_exit(void); - -u64 count_range_bits(struct extent_map_tree *tree, - u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits); - -int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end, - int bits, int filled); -int clear_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_bits(struct extent_map_tree *tree, u64 start, u64 end, - int bits, gfp_t mask); -int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end, - gfp_t mask); -int find_first_extent_bit(struct extent_map_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits); -int extent_invalidatepage(struct extent_map_tree *tree, - struct page *page, unsigned long offset); -int extent_write_full_page(struct extent_map_tree *tree, struct page *page, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_writepages(struct extent_map_tree *tree, - struct address_space *mapping, - get_extent_t *get_extent, - struct writeback_control *wbc); -int extent_readpages(struct extent_map_tree *tree, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages, - get_extent_t get_extent); -int extent_prepare_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to, get_extent_t *get_extent); -int extent_commit_write(struct extent_map_tree *tree, - struct inode *inode, struct page *page, - unsigned from, unsigned to); -sector_t extent_bmap(struct address_space *mapping, sector_t iblock, - get_extent_t *get_extent); -int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end); -int set_state_private(struct extent_map_tree *tree, u64 start, u64 private); -int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private); -void set_page_extent_mapped(struct page *page); - -struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - struct page *page0, - gfp_t mask); -struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree, - u64 start, unsigned long len, - gfp_t mask); -void free_extent_buffer(struct extent_buffer *eb); -int read_extent_buffer_pages(struct extent_map_tree *tree, - struct extent_buffer *eb, u64 start, int wait); - -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - -int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, - unsigned long start, - unsigned long len); -void read_extent_buffer(struct extent_buffer *eb, void *dst, - unsigned long start, - unsigned long len); -void write_extent_buffer(struct extent_buffer *eb, const void *src, - unsigned long start, unsigned long len); -void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, - unsigned long dst_offset, unsigned long src_offset, - unsigned long len); -void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, - unsigned long src_offset, unsigned long len); -void memset_extent_buffer(struct extent_buffer *eb, char c, - unsigned long start, unsigned long len); -int wait_on_extent_buffer_writeback(struct extent_map_tree *tree, - struct extent_buffer *eb); -int clear_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_dirty(struct extent_map_tree *tree, - struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int extent_buffer_uptodate(struct extent_map_tree *tree, - struct extent_buffer *eb); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1cd8c908811e..c5bb00f92396 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -233,8 +233,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, int err = 0; int i; struct inode *inode = fdentry(file)->d_inode; - struct extent_map *em; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 hint_byte; u64 num_bytes; u64 start_pos; @@ -242,11 +241,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, u64 end_pos = pos + write_bytes; u64 inline_size; loff_t isize = i_size_read(inode); - em = alloc_extent_map(GFP_NOFS); - if (!em) - return -ENOMEM; - - em->bdev = inode->i_sb->s_bdev; start_pos = pos & ~((u64)root->sectorsize - 1); num_bytes = (write_bytes + pos - start_pos + @@ -254,7 +248,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; - lock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); + lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { @@ -268,7 +262,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, if ((end_of_last_block & 4095) == 0) { printk("strange end of last %Lu %zu %Lu\n", start_pos, write_bytes, end_of_last_block); } - set_extent_uptodate(em_tree, start_pos, end_of_last_block, GFP_NOFS); + set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS); /* FIXME...EIEIO, ENOSPC and more */ @@ -293,6 +287,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inode->i_ino, last_pos_in_file, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, last_pos_in_file, + last_pos_in_file + hole_size -1); btrfs_check_file(root, inode); } if (err) @@ -320,12 +316,12 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, last_end += PAGE_CACHE_SIZE - 1; if (start_pos < isize) { u64 delalloc_start = start_pos; - existing_delalloc = count_range_bits(em_tree, + existing_delalloc = count_range_bits(io_tree, &delalloc_start, end_of_last_block, (u64)-1, EXTENT_DELALLOC); } - set_extent_delalloc(em_tree, start_pos, end_of_last_block, + set_extent_delalloc(io_tree, start_pos, end_of_last_block, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); root->fs_info->delalloc_bytes += (end_of_last_block + 1 - @@ -346,6 +342,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, inline_size -= start_pos; err = insert_inline_extent(trans, root, inode, start_pos, inline_size, pages, 0, num_pages); + btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); } if (end_pos > isize) { @@ -356,8 +353,7 @@ failed: err = btrfs_end_transaction(trans, root); out_unlock: mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, start_pos, end_of_last_block, GFP_NOFS); - free_extent_map(em); + unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -367,10 +363,15 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end) struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; while(1) { + spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, end); - if (!em) + if (!em) { + spin_unlock(&em_tree->lock); break; + } remove_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + /* once for us */ free_extent_map(em); /* once for the tree*/ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 67005480e139..16d3aef45d18 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -53,7 +53,7 @@ static struct inode_operations btrfs_file_inode_operations; static struct address_space_operations btrfs_aops; static struct address_space_operations btrfs_symlink_aops; static struct file_operations btrfs_dir_file_operations; -static struct extent_map_ops btrfs_extent_map_ops; +static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -104,6 +104,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) u64 num_bytes; u64 cur_alloc_size; u64 blocksize = root->sectorsize; + u64 orig_start = start; + u64 orig_num_bytes; struct btrfs_key ins; int ret; @@ -115,6 +117,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) num_bytes = max(blocksize, num_bytes); ret = btrfs_drop_extents(trans, root, inode, start, start + num_bytes, start, &alloc_hint); + orig_num_bytes = num_bytes; if (alloc_hint == EXTENT_MAP_INLINE) goto out; @@ -138,6 +141,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; } + btrfs_drop_extent_cache(inode, orig_start, + orig_start + orig_num_bytes - 1); btrfs_add_ordered_inode(inode); out: btrfs_end_transaction(trans, root); @@ -297,7 +302,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) int ret = 0; struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_csum_item *item; struct btrfs_path *path = NULL; u32 csum; @@ -317,7 +322,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) } read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, BTRFS_CRC32_SIZE); - set_state_private(em_tree, start, csum); + set_state_private(io_tree, start, csum); out: if (path) btrfs_free_path(path); @@ -329,17 +334,19 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; char *kaddr; u64 private; int ret; struct btrfs_root *root = BTRFS_I(inode)->root; u32 csum = ~(u32)0; unsigned long flags; + if (btrfs_test_opt(root, NODATASUM) || btrfs_test_flag(inode, NODATASUM)) return 0; - ret = get_state_private(em_tree, start, &private); + + ret = get_state_private(io_tree, start, &private); local_irq_save(flags); kaddr = kmap_atomic(page, KM_IRQ0); if (ret) { @@ -428,7 +435,7 @@ void btrfs_read_locked_inode(struct inode *inode) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; break; @@ -873,7 +880,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, size_t zero_start) { char *kaddr; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_root *root = BTRFS_I(inode)->root; u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -884,12 +891,12 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, WARN_ON(!PageLocked(page)); set_page_extent_mapped(page); - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; - existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree, + existing_delalloc = count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start, + set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -903,7 +910,7 @@ static int btrfs_cow_one_page(struct inode *inode, struct page *page, kunmap(page); } set_page_dirty(page); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); return ret; } @@ -961,7 +968,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; u64 mask = root->sectorsize - 1; u64 pos = (inode->i_size + mask) & ~mask; @@ -986,7 +993,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_truncate_page(inode->i_mapping, inode->i_size); - lock_extent(em_tree, pos, block_end, GFP_NOFS); + lock_extent(io_tree, pos, block_end, GFP_NOFS); hole_size = block_end - hole_start; mutex_lock(&root->fs_info->fs_mutex); @@ -1001,11 +1008,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_ino, hole_start, 0, 0, hole_size); + btrfs_drop_extent_cache(inode, hole_start, + hole_size - 1); btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); mutex_unlock(&root->fs_info->fs_mutex); - unlock_extent(em_tree, pos, block_end, GFP_NOFS); + unlock_extent(io_tree, pos, block_end, GFP_NOFS); if (err) return err; } @@ -1189,7 +1198,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) struct btrfs_iget_args *args = p; inode->i_ino = args->ino; BTRFS_I(inode)->root = args->root; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); return 0; } @@ -1485,7 +1495,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (!inode) return ERR_PTR(-ENOMEM); - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); BTRFS_I(inode)->root = root; @@ -1672,9 +1683,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -1816,7 +1828,7 @@ out_unlock: } struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { int ret; @@ -1826,7 +1838,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, u64 extent_end = 0; u64 objectid = inode->i_ino; u32 found_type; - int failed_insert = 0; struct btrfs_path *path; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_file_extent_item *item; @@ -1834,6 +1845,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct btrfs_key found_key; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); @@ -1841,24 +1853,26 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, mutex_lock(&root->fs_info->fs_mutex); again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); + if (em) { if (em->start > start) { - printk("get_extent start %Lu em start %Lu\n", - start, em->start); + printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n", + start, len, em->start, em->len); WARN_ON(1); } goto out; } + em = alloc_extent_map(GFP_NOFS); if (!em) { - em = alloc_extent_map(GFP_NOFS); - if (!em) { - err = -ENOMEM; - goto out; - } - em->start = EXTENT_MAP_HOLE; - em->end = EXTENT_MAP_HOLE; + err = -ENOMEM; + goto out; } + + em->start = EXTENT_MAP_HOLE; + em->len = (u64)-1; em->bdev = inode->i_sb->s_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); @@ -1893,28 +1907,25 @@ again: if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end - 1; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } bytenr = btrfs_file_extent_disk_bytenr(leaf, item); if (bytenr == 0) { em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; goto insert; } bytenr += btrfs_file_extent_offset(leaf, item); em->block_start = bytenr; - em->block_end = em->block_start + - btrfs_file_extent_num_bytes(leaf, item) - 1; em->start = extent_start; - em->end = extent_end - 1; + em->len = extent_end - extent_start; goto insert; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { unsigned long ptr; @@ -1925,25 +1936,24 @@ again: size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf, path->slots[0])); - extent_end = (extent_start + size - 1) | - ((u64)root->sectorsize - 1); + extent_end = (extent_start + size + root->sectorsize - 1) & + ~((u64)root->sectorsize - 1); if (start < extent_start || start >= extent_end) { em->start = start; if (start < extent_start) { - if (end < extent_start) + if (start + len <= extent_start) goto not_found; - em->end = extent_end; + em->len = extent_end - extent_start; } else { - em->end = end; + em->len = len; } goto not_found_em; } em->block_start = EXTENT_MAP_INLINE; - em->block_end = EXTENT_MAP_INLINE; if (!page) { em->start = extent_start; - em->end = extent_start + size - 1; + em->len = size; goto out; } @@ -1952,8 +1962,7 @@ again: copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, size - extent_offset); em->start = extent_start + extent_offset; - em->end = (em->start + copy_size -1) | - ((u64)root->sectorsize -1); + em->len = copy_size; map = kmap(page); ptr = btrfs_file_extent_inline_start(item) + extent_offset; if (create == 0 && !PageUptodate(page)) { @@ -1974,7 +1983,8 @@ again: btrfs_mark_buffer_dirty(leaf); } kunmap(page); - set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS); + set_extent_uptodate(io_tree, em->start, + extent_map_end(em) - 1, GFP_NOFS); goto insert; } else { printk("unkknown found_type %d\n", found_type); @@ -1982,33 +1992,29 @@ again: } not_found: em->start = start; - em->end = end; + em->len = len; not_found_em: em->block_start = EXTENT_MAP_HOLE; - em->block_end = EXTENT_MAP_HOLE; insert: btrfs_release_path(root, path); - if (em->start > start || em->end < start) { - printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end); + if (em->start > start || extent_map_end(em) <= start) { + printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len); err = -EIO; goto out; } + + err = 0; + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - if (0 && failed_insert == 1) { - btrfs_drop_extent_cache(inode, start, end); - } - failed_insert++; - if (failed_insert > 5) { - printk("failing to insert %Lu %Lu\n", start, end); + em = lookup_extent_mapping(em_tree, start, len); + if (!em) { err = -EIO; - goto out; + printk("failing to insert %Lu %Lu\n", start, len); } - goto again; } - err = 0; + spin_unlock(&em_tree->lock); out: btrfs_free_path(path); if (trans) { @@ -2032,14 +2038,14 @@ static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) int btrfs_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btrfs_get_extent); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; if (current->flags & PF_MEMALLOC) { @@ -2047,15 +2053,15 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); return 0; } - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btrfs_get_extent, wbc); } static int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_writepages(tree, mapping, btrfs_get_extent, wbc); } @@ -2063,19 +2069,21 @@ static int btrfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; return extent_readpages(tree, mapping, pages, nr_pages, btrfs_get_extent); } static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -2086,9 +2094,9 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btrfs_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btrfs_releasepage(page, GFP_NOFS); } @@ -2374,7 +2382,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping, int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct page *page; unsigned long last_index; unsigned long ra_index = 0; @@ -2414,13 +2422,13 @@ int btrfs_defrag_file(struct file *file) { page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(em_tree, page_start, page_end, GFP_NOFS); + lock_extent(io_tree, page_start, page_end, GFP_NOFS); delalloc_start = page_start; existing_delalloc = - count_range_bits(&BTRFS_I(inode)->extent_tree, + count_range_bits(&BTRFS_I(inode)->io_tree, &delalloc_start, page_end, PAGE_CACHE_SIZE, EXTENT_DELALLOC); - set_extent_delalloc(em_tree, page_start, + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); spin_lock(&root->fs_info->delalloc_lock); @@ -2428,7 +2436,7 @@ int btrfs_defrag_file(struct file *file) { existing_delalloc; spin_unlock(&root->fs_info->delalloc_lock); - unlock_extent(em_tree, page_start, page_end, GFP_NOFS); + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); set_page_dirty(page); unlock_page(page); page_cache_release(page); @@ -2842,9 +2850,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &btrfs_aops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; - extent_map_tree_init(&BTRFS_I(inode)->extent_tree, + extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); + extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); - BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } dir->i_sb->s_dirt = 1; btrfs_update_inode_block_group(trans, inode); @@ -2934,7 +2943,7 @@ static struct file_operations btrfs_dir_file_operations = { #endif }; -static struct extent_map_ops btrfs_extent_map_ops = { +static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .writepage_io_hook = btrfs_writepage_io_hook, .readpage_io_hook = btrfs_readpage_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8b52c69fda2e..f8a1016600b1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -468,10 +468,15 @@ static int __init init_btrfs_fs(void) err = btrfs_init_cachep(); if (err) goto free_transaction_sys; - err = extent_map_init(); + + err = extent_io_init(); if (err) goto free_cachep; + err = extent_map_init(); + if (err) + goto free_extent_io; + err = register_filesystem(&btrfs_fs_type); if (err) goto free_extent_map; @@ -479,6 +484,8 @@ static int __init init_btrfs_fs(void) free_extent_map: extent_map_exit(); +free_extent_io: + extent_io_exit(); free_cachep: btrfs_destroy_cachep(); free_transaction_sys: @@ -492,6 +499,7 @@ static void __exit exit_btrfs_fs(void) btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); + extent_io_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 163c01a24498..b4a1bc62a784 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -70,7 +70,7 @@ static int join_transaction(struct btrfs_root *root) INIT_LIST_HEAD(&cur_trans->pending_snapshots); list_add_tail(&cur_trans->list, &root->fs_info->trans_list); btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree); - extent_map_tree_init(&cur_trans->dirty_pages, + extent_io_tree_init(&cur_trans->dirty_pages, root->fs_info->btree_inode->i_mapping, GFP_NOFS); } else { @@ -153,7 +153,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int ret; int err; int werr = 0; - struct extent_map_tree *dirty_pages; + struct extent_io_tree *dirty_pages; struct page *page; struct inode *btree_inode = root->fs_info->btree_inode; u64 start; @@ -610,7 +610,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans; struct btrfs_transaction *prev_trans = NULL; struct list_head dirty_fs_roots; - struct extent_map_tree *pinned_copy; + struct extent_io_tree *pinned_copy; DEFINE_WAIT(wait); int ret; @@ -639,7 +639,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (!pinned_copy) return -ENOMEM; - extent_map_tree_init(pinned_copy, + extent_io_tree_init(pinned_copy, root->fs_info->btree_inode->i_mapping, GFP_NOFS); trans->transaction->in_commit = 1; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fd52e9b23922..c3172ddb3321 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -29,7 +29,7 @@ struct btrfs_transaction { int use_count; int commit_done; struct list_head list; - struct extent_map_tree dirty_pages; + struct extent_io_tree dirty_pages; unsigned long start_time; struct btrfs_ordered_inode_tree ordered_inode_tree; wait_queue_head_t writer_wait; -- cgit v1.2.3 From 6f568d35a045dbb8a13fe71bfc32e85e39a986cb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 16:03:38 -0500 Subject: Btrfs: mount -o max_inline=size to control the maximum inline extent size Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent_io.c | 1 - fs/btrfs/file.c | 3 ++- fs/btrfs/super.c | 19 ++++++++++++++++++- 5 files changed, 22 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 127c86f795d0..8f93e066bf8b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -325,6 +325,7 @@ struct btrfs_fs_info { u64 last_trans_committed; unsigned long mount_opt; u64 max_extent; + u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e29a0716f6a9..6647b90ef745 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -659,6 +659,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->throttles = 0; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->max_inline = 8192 * 1024; fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e81b35b06187..6999b819db62 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2451,7 +2451,6 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, goto out; sector = (em->block_start + start - em->start) >> inode->i_blkbits; -printk("bmap finds %Lu %Lu block %Lu\n", em->start, em->len, em->block_start); out: free_extent_map(em); return sector; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8e210616d702..7c10a90362ff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -301,7 +301,8 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, */ inline_size = end_pos; if (isize >= BTRFS_MAX_INLINE_DATA_SIZE(root) || - inline_size > 8192 || + inline_size > root->fs_info->max_inline || + (inline_size & (root->sectorsize -1)) == 0 || inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { u64 last_end; u64 existing_delalloc = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f8a1016600b1..a46300c4753a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,7 +64,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, + Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; static match_table_t tokens = { @@ -73,6 +73,7 @@ static match_table_t tokens = { {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, {Opt_max_extent, "max_extent=%s"}, + {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_ssd, "ssd"}, {Opt_err, NULL} @@ -178,6 +179,22 @@ static int parse_options (char * options, } } break; + case Opt_max_inline: + if (info) { + char *num = match_strdup(&args[0]); + if (num) { + info->max_inline = + btrfs_parse_size(num); + kfree(num); + + info->max_inline = max_t(u64, + info->max_inline, + root->sectorsize); + printk("btrfs: max_inline at %Lu\n", + info->max_inline); + } + } + break; case Opt_alloc_start: if (info) { char *num = match_strdup(&args[0]); -- cgit v1.2.3 From 6885f308b557020efce6ef9c1fcc96adaabecbb6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Feb 2008 16:11:05 -0500 Subject: Btrfs: Misc 2.6.25 updates Remove the btrfs read_inode method, and use save_mount_options Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 29 +---------------------------- fs/btrfs/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 29 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bbb71fad861a..315dd550d560 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -297,7 +297,6 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, int btrfs_submit_bio_hook(int rw, struct bio *bio) { - // struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec; struct inode *inode = bvec->bv_page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -320,32 +319,7 @@ int btrfs_submit_bio_hook(int rw, struct bio *bio) mutex_unlock(&root->fs_info->fs_mutex); return ret; } -#if 0 -int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end) -{ - struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; - char *kaddr; - int ret = 0; - u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT; - size_t offset = start - page_start; - if (btrfs_test_opt(root, NODATASUM) || - btrfs_test_flag(inode, NODATASUM)) - return 0; - mutex_lock(&root->fs_info->fs_mutex); - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - kaddr = kmap(page); - btrfs_csum_file_block(trans, root, inode, inode->i_ino, - start, kaddr + offset, end - start + 1); - kunmap(page); - ret = btrfs_end_transaction(trans, root); - BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); - return ret; -} -#endif + int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) { int ret = 0; @@ -3049,7 +3023,6 @@ static struct file_operations btrfs_dir_file_operations = { static struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, - // .writepage_io_hook = btrfs_writepage_io_hook, .submit_bio_hook = btrfs_submit_bio_hook, .readpage_io_hook = btrfs_readpage_io_hook, .readpage_end_io_hook = btrfs_readpage_end_io_hook, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a46300c4753a..4423a91206a1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -272,6 +272,11 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_root = root_dentry; btrfs_transaction_queue_work(tree_root, HZ * 30); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) + save_mount_options(sb, data); +#endif + return 0; fail_close: @@ -462,9 +467,13 @@ static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, - .read_inode = btrfs_read_locked_inode, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + .read_inode = btrfs_read_locked_inode, +#else + .show_options = generic_show_options, +#endif .write_inode = btrfs_write_inode, .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, -- cgit v1.2.3 From a9218f6b00ada101c9772ee9c44af04fa5125d2a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:04 -0400 Subject: Add /dev/btrfs-control for device scanning ioctls Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 4 ---- fs/btrfs/super.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5e392804ab15..14eb8fc87015 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -374,10 +374,6 @@ again: found_group = cache; goto found; } - if (full_search) { - printk("failed on cache %Lu used %Lu total %Lu\n", - cache->key.objectid, used, cache->key.offset); - } cond_resched(); } if (!full_search) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4423a91206a1..67ed216df475 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -444,6 +445,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + printk("btrfs control ioctl %d\n", cmd); + return 0; +} + static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -451,6 +459,7 @@ static struct file_system_type btrfs_fs_type = { .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -482,6 +491,30 @@ static struct super_operations btrfs_super_ops = { .write_super_lockfs = btrfs_write_super_lockfs, .unlockfs = btrfs_unlockfs, }; + +static const struct file_operations btrfs_ctl_fops = { + .unlocked_ioctl = btrfs_control_ioctl, + .compat_ioctl = btrfs_control_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice btrfs_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "btrfs-control", + .fops = &btrfs_ctl_fops +}; + +static int btrfs_interface_init(void) +{ + return misc_register(&btrfs_misc); +} + +void btrfs_interface_exit(void) +{ + if (misc_deregister(&btrfs_misc) < 0) + printk("misc_deregister failed for control device"); +} + static int __init init_btrfs_fs(void) { int err; @@ -503,11 +536,16 @@ static int __init init_btrfs_fs(void) if (err) goto free_extent_io; - err = register_filesystem(&btrfs_fs_type); + err = btrfs_interface_init(); if (err) goto free_extent_map; + err = register_filesystem(&btrfs_fs_type); + if (err) + goto unregister_ioctl; return 0; +unregister_ioctl: + btrfs_interface_exit(); free_extent_map: extent_map_exit(); free_extent_io: @@ -526,6 +564,7 @@ static void __exit exit_btrfs_fs(void) btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); + btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); } -- cgit v1.2.3 From 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Add support for device scanning and detection ioctls Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 21 ++++- fs/btrfs/disk-io.c | 24 ++++-- fs/btrfs/disk-io.h | 4 +- fs/btrfs/ioctl.h | 6 +- fs/btrfs/super.c | 61 ++++++++++---- fs/btrfs/volumes.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/volumes.h | 25 ++++++ 7 files changed, 333 insertions(+), 44 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acf22ad6115c..7556f8319c60 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -37,7 +37,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; -#define BTRFS_MAGIC "_B4RfS_M" +#define BTRFS_MAGIC "_B5RfS_M" #define BTRFS_MAX_LEVEL 8 @@ -238,6 +238,7 @@ struct btrfs_super_block { __le64 total_bytes; __le64 bytes_used; __le64 root_dir_objectid; + __le64 num_devices; __le32 sectorsize; __le32 nodesize; __le32 leafsize; @@ -440,6 +441,7 @@ struct btrfs_block_group_cache { }; struct btrfs_device; +struct btrfs_fs_devices; struct btrfs_fs_info { u8 fsid[BTRFS_FSID_SIZE]; struct btrfs_root *extent_root; @@ -489,7 +491,7 @@ struct btrfs_fs_info { u64 total_pinned; struct list_head dirty_cowonly_roots; - struct list_head devices; + struct btrfs_fs_devices *fs_devices; struct list_head space_info; spinlock_t delalloc_lock; spinlock_t new_trans_lock; @@ -677,6 +679,19 @@ BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); + static inline char *btrfs_device_uuid(struct btrfs_dev_item *d) { return (char *)d + offsetof(struct btrfs_dev_item, uuid); @@ -1106,6 +1121,8 @@ BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, stripesize, 32); BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); static inline unsigned long btrfs_leaf_data(struct extent_buffer *l) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4890151cd68d..f971a29e4f20 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -365,12 +365,12 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) struct list_head *next; struct btrfs_device *device; - list = &fs_info->devices; - while(!list_empty(list)) { - next = list->next; - list_del(next); + list = &fs_info->fs_devices->devices; + list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - kfree(device); + if (device->bdev && device->bdev != fs_info->sb->s_bdev) + close_bdev_excl(device->bdev); + device->bdev = NULL; } return 0; } @@ -655,7 +655,8 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif -struct btrfs_root *open_ctree(struct super_block *sb) +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices) { u32 sectorsize; u32 nodesize; @@ -697,8 +698,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; fs_info->dev_root = dev_root; + fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); - INIT_LIST_HEAD(&fs_info->devices); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; @@ -779,6 +780,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + printk("Btrfs: wanted %llu devices, but found %llu\n", + (unsigned long long)btrfs_super_num_devices(disk_super), + (unsigned long long)fs_devices->num_devices); + goto fail_sb_buffer; + } nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); @@ -799,8 +806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); - ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); - BUG_ON(ret); ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); @@ -859,6 +864,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: + close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); kfree(fs_info); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 206cb48638f7..b7cbc58a5553 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -21,6 +21,7 @@ #define BTRFS_SUPER_INFO_OFFSET (16 * 1024) struct btrfs_device; +struct btrfs_fs_devices; struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); @@ -29,7 +30,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize); int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); -struct btrfs_root *open_ctree(struct super_block *sb); +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 8c6290665d49..4551e82013c8 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,8 +22,10 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 +#define BTRFS_PATH_NAME_MAX 4095 + struct btrfs_ioctl_vol_args { - char name[BTRFS_VOL_NAME_MAX + 1]; + char name[BTRFS_PATH_NAME_MAX + 1]; }; #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ @@ -32,4 +34,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_RESIZE _IOW(BTRFS_IOCTL_MAGIC, 3, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 67ed216df475..9624923a33dc 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -44,6 +44,7 @@ #include "ioctl.h" #include "print-tree.h" #include "xattr.h" +#include "volumes.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -216,7 +217,9 @@ static int parse_options (char * options, return 1; } -static int btrfs_fill_super(struct super_block * sb, void * data, int silent) +static int btrfs_fill_super(struct super_block * sb, + struct btrfs_fs_devices *fs_devices, + void * data, int silent) { struct inode * inode; struct dentry * root_dentry; @@ -231,7 +234,7 @@ static int btrfs_fill_super(struct super_block * sb, void * data, int silent) sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - tree_root = open_ctree(sb); + tree_root = open_ctree(sb, fs_devices); if (!tree_root || IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); @@ -334,18 +337,23 @@ static int test_bdev_super(struct super_block *s, void *data) int btrfs_get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), struct vfsmount *mnt, const char *subvol) { struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; + struct btrfs_fs_devices *fs_devices = NULL; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); + if (error) + return error; + error = btrfs_open_devices(fs_devices, flags, fs_type); + if (error) + return error; + + bdev = fs_devices->lowest_bdev; /* * once the super is inserted into the list by sget, s_umount * will protect the lockfs code from trying to start a snapshot @@ -372,7 +380,8 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); + error = btrfs_fill_super(s, fs_devices, data, + flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); @@ -408,7 +417,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + btrfs_close_devices(fs_devices); error: return error; } @@ -421,8 +430,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, char *subvol_name = NULL; parse_options((char *)data, NULL, &subvol_name); - ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, - btrfs_fill_super, mnt, + ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, subvol_name ? subvol_name : "default"); if (subvol_name) kfree(subvol_name); @@ -445,13 +453,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static long btrfs_control_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - printk("btrfs control ioctl %d\n", cmd); - return 0; -} - static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", @@ -460,6 +461,31 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +static long btrfs_control_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct btrfs_ioctl_vol_args *vol; + struct btrfs_fs_devices *fs_devices; + int ret; + int len; + + vol = kmalloc(sizeof(*vol), GFP_KERNEL); + if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { + ret = -EFAULT; + goto out; + } + len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); + switch (cmd) { + case BTRFS_IOC_SCAN_DEV: + ret = btrfs_scan_one_device(vol->name, MS_RDONLY, + &btrfs_fs_type, &fs_devices); + break; + } +out: + kfree(vol); + return 0; +} + static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); @@ -567,6 +593,7 @@ static void __exit exit_btrfs_fs(void) btrfs_interface_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); + btrfs_cleanup_fs_uuids(); } module_init(init_btrfs_fs) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 16fb6bbe6e28..263f01cc3db4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -17,6 +17,7 @@ */ #include #include +#include #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -28,6 +29,215 @@ struct map_lookup { struct btrfs_device *dev; u64 physical; }; +static DEFINE_MUTEX(uuid_mutex); +static LIST_HEAD(fs_uuids); + +int btrfs_cleanup_fs_uuids(void) +{ + struct btrfs_fs_devices *fs_devices; + struct list_head *uuid_cur; + struct list_head *devices_cur; + struct btrfs_device *dev; + + list_for_each(uuid_cur, &fs_uuids) { + fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, + list); + while(!list_empty(&fs_devices->devices)) { + devices_cur = fs_devices->devices.next; + dev = list_entry(devices_cur, struct btrfs_device, + dev_list); + printk("uuid cleanup finds %s\n", dev->name); + if (dev->bdev) { + printk("closing\n"); + close_bdev_excl(dev->bdev); + } + list_del(&dev->dev_list); + kfree(dev); + } + } + return 0; +} + +static struct btrfs_device *__find_device(struct list_head *head, u64 devid) +{ + struct btrfs_device *dev; + struct list_head *cur; + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + if (dev->devid == devid) + return dev; + } + return NULL; +} + +static struct btrfs_fs_devices *find_fsid(u8 *fsid) +{ + struct list_head *cur; + struct btrfs_fs_devices *fs_devices; + + list_for_each(cur, &fs_uuids) { + fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + return NULL; +} + +static int device_list_add(const char *path, + struct btrfs_super_block *disk_super, + u64 devid, struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices; + u64 found_transid = btrfs_super_generation(disk_super); + + fs_devices = find_fsid(disk_super->fsid); + if (!fs_devices) { + fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); + if (!fs_devices) + return -ENOMEM; + INIT_LIST_HEAD(&fs_devices->devices); + list_add(&fs_devices->list, &fs_uuids); + memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + fs_devices->lowest_devid = (u64)-1; + fs_devices->num_devices = 0; + device = NULL; + } else { + device = __find_device(&fs_devices->devices, devid); + } + if (!device) { + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + return -ENOMEM; + } + device->devid = devid; + device->name = kstrdup(path, GFP_NOFS); + if (!device->name) { + kfree(device); + return -ENOMEM; + } + list_add(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; + } + + if (found_transid > fs_devices->latest_trans) { + fs_devices->latest_devid = devid; + fs_devices->latest_trans = found_transid; + } + if (fs_devices->lowest_devid > devid) { + fs_devices->lowest_devid = devid; + printk("lowest devid now %Lu\n", devid); + } + *fs_devices_ret = fs_devices; + return 0; +} + +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev) { + close_bdev_excl(device->bdev); + printk("close devices closes %s\n", device->name); + } + device->bdev = NULL; + } + mutex_unlock(&uuid_mutex); + return 0; +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) +{ + struct block_device *bdev; + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + int ret; + + mutex_lock(&uuid_mutex); + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdev = open_bdev_excl(device->name, flags, holder); +printk("opening %s devid %Lu\n", device->name, device->devid); + if (IS_ERR(bdev)) { + printk("open %s failed\n", device->name); + ret = PTR_ERR(bdev); + goto fail; + } + if (device->devid == fs_devices->latest_devid) + fs_devices->latest_bdev = bdev; + if (device->devid == fs_devices->lowest_devid) { + fs_devices->lowest_bdev = bdev; +printk("lowest bdev %s\n", device->name); + } + device->bdev = bdev; + } + mutex_unlock(&uuid_mutex); + return 0; +fail: + mutex_unlock(&uuid_mutex); + btrfs_close_devices(fs_devices); + return ret; +} + +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret) +{ + struct btrfs_super_block *disk_super; + struct block_device *bdev; + struct buffer_head *bh; + int ret; + u64 devid; + + mutex_lock(&uuid_mutex); + + printk("scan one opens %s\n", path); + bdev = open_bdev_excl(path, flags, holder); + + if (IS_ERR(bdev)) { + printk("open failed\n"); + ret = PTR_ERR(bdev); + goto error; + } + + ret = set_blocksize(bdev, 4096); + if (ret) + goto error_close; + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("no btrfs found on %s\n", path); + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + printk("found device %Lu on %s\n", devid, path); + ret = device_list_add(path, disk_super, devid, fs_devices_ret); + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); + printk("scan one closes bdev %s\n", path); +error: + mutex_unlock(&uuid_mutex); + return ret; +} /* * this uses a pretty simple search, the expectation is that it is @@ -56,6 +266,10 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, /* FIXME use last free of some kind */ + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ + search_start = max((u64)1024 * 1024, search_start); key.objectid = device->devid; key.offset = search_start; key.type = BTRFS_DEV_EXTENT_KEY; @@ -285,6 +499,7 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); + device->devid = free_devid; btrfs_set_device_id(leaf, dev_item, device->devid); btrfs_set_device_type(leaf, dev_item, device->type); btrfs_set_device_io_align(leaf, dev_item, device->io_align); @@ -382,7 +597,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_device *device = NULL; struct btrfs_chunk *chunk; struct list_head private_devs; - struct list_head *dev_list = &extent_root->fs_info->devices; + struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; struct list_head *cur; struct extent_map_tree *em_tree; struct map_lookup *map; @@ -449,7 +664,7 @@ again: key.objectid, calc_size, &dev_offset); BUG_ON(ret); - +printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); device->bytes_used += calc_size; ret = btrfs_update_device(trans, device); BUG_ON(ret); @@ -592,17 +807,9 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) { - struct btrfs_device *dev; - struct list_head *cur = root->fs_info->devices.next; - struct list_head *head = &root->fs_info->devices; + struct list_head *head = &root->fs_info->fs_devices->devices; - while(cur != head) { - dev = list_entry(cur, struct btrfs_device, dev_list); - if (dev->devid == devid) - return dev; - cur = cur->next; - } - return NULL; + return __find_device(head, devid); } static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, @@ -699,15 +906,16 @@ static int read_one_dev(struct btrfs_root *root, devid = btrfs_device_id(leaf, dev_item); device = btrfs_find_device(root, devid); if (!device) { + printk("warning devid %Lu not found already\n", devid); device = kmalloc(sizeof(*device), GFP_NOFS); if (!device) return -ENOMEM; - list_add(&device->dev_list, &root->fs_info->devices); + list_add(&device->dev_list, + &root->fs_info->fs_devices->devices); } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; - device->bdev = root->fs_info->sb->s_bdev; ret = 0; #if 0 ret = btrfs_open_device(device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 20259128152e..12f297eb0559 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -24,6 +24,8 @@ struct btrfs_device { struct block_device *bdev; + char *name; + /* the internal btrfs device id */ u64 devid; @@ -49,6 +51,20 @@ struct btrfs_device { u8 uuid[BTRFS_DEV_UUID_SIZE]; }; +struct btrfs_fs_devices { + u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */ + + /* the device with this id has the most recent coyp of the super */ + u64 latest_devid; + u64 latest_trans; + u64 lowest_devid; + u64 num_devices; + struct block_device *latest_bdev; + struct block_device *lowest_bdev; + struct list_head devices; + struct list_head list; +}; + int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 owner, u64 num_bytes, u64 *start); @@ -67,4 +83,13 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf); int btrfs_map_block(struct btrfs_mapping_tree *map_tree, u64 logical, u64 *phys, u64 *length, struct btrfs_device **dev); +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder); +int btrfs_scan_one_device(const char *path, int flags, void *holder, + struct btrfs_fs_devices **fs_devices_ret); +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_add_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +int btrfs_cleanup_fs_uuids(void); #endif -- cgit v1.2.3 From e58ca0203d32869a01540a293df40ddc480dc378 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 1 Apr 2008 11:21:34 -0400 Subject: Fix btrfs_fill_super to return -EINVAL when no FS found Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- fs/btrfs/super.c | 4 ++-- fs/btrfs/volumes.c | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 709e33d08bb5..5547607681f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -728,9 +728,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); int ret; - int err = -EIO; + int err = -EINVAL; struct btrfs_super_block *disk_super; - if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9624923a33dc..892864906880 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -236,9 +236,9 @@ static int btrfs_fill_super(struct super_block * sb, tree_root = open_ctree(sb, fs_devices); - if (!tree_root || IS_ERR(tree_root)) { + if (IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); - return -EIO; + return PTR_ERR(tree_root); } sb->s_fs_info = tree_root; disk_super = &tree_root->fs_info->super_copy; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d8fce32a3bbc..169be0f7285b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -237,7 +237,7 @@ int btrfs_scan_one_device(const char *path, int flags, void *holder, if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("no btrfs found on %s\n", path); - ret = -ENOENT; + ret = -EINVAL; goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); -- cgit v1.2.3 From 788f20eb5affef584e75ea84bb80a4c3352a2c0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 15:29:42 -0400 Subject: Btrfs: Add new ioctl to add devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/inode.c | 23 +++++++++++++++++ fs/btrfs/ioctl.h | 6 +++++ fs/btrfs/super.c | 1 + fs/btrfs/volumes.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 6 files changed, 108 insertions(+) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ac7106ec5357..b9257b37bb96 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -551,6 +551,8 @@ struct btrfs_fs_info { u64 data_alloc_profile; u64 metadata_alloc_profile; u64 system_alloc_profile; + + void *bdev_holder; }; /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ab707c0930df..1a74b5018699 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3070,6 +3070,27 @@ static int btrfs_ioctl_defrag(struct file *file) return 0; } +long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_init_new_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3082,6 +3103,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, (void __user *)arg); + case BTRFS_IOC_ADD_DEV: + return btrfs_ioctl_add_dev(root, (void __user *)arg); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4551e82013c8..8ad35fc4ba56 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,4 +36,10 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_RM_DEV _IOW(BTRFS_IOCTL_MAGIC, 11, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ + struct btrfs_ioctl_vol_args) #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 892864906880..7153dfaa3404 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -388,6 +388,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error; } + btrfs_sb(s)->fs_info->bdev_holder = fs_type; s->s_flags |= MS_ACTIVE; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a2c56de1548a..b93c15aa17db 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "ctree.h" #include "extent_map.h" @@ -592,6 +593,80 @@ out: return ret; } +int btrfs_init_new_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_trans_handle *trans; + struct btrfs_device *device; + struct block_device *bdev; + struct list_head *cur; + struct list_head *devices; + u64 total_bytes; + int ret = 0; + + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (!bdev) { + return -EIO; + } + mutex_lock(&root->fs_info->fs_mutex); + trans = btrfs_start_transaction(root, 1); + devices = &root->fs_info->fs_devices->devices; + list_for_each(cur, devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (device->bdev == bdev) { + ret = -EEXIST; + goto out; + } + } + + device = kzalloc(sizeof(*device), GFP_NOFS); + if (!device) { + /* we can safely leave the fs_devices entry around */ + ret = -ENOMEM; + goto out_close_bdev; + } + + device->barriers = 1; + generate_random_uuid(device->uuid); + spin_lock_init(&device->io_lock); + device->name = kstrdup(device_path, GFP_NOFS); + if (!device->name) { + kfree(device); + goto out_close_bdev; + } + device->io_width = root->sectorsize; + device->io_align = root->sectorsize; + device->sector_size = root->sectorsize; + device->total_bytes = i_size_read(bdev->bd_inode); + device->dev_root = root->fs_info->dev_root; + device->bdev = bdev; + + ret = btrfs_add_device(trans, root, device); + if (ret) + goto out_close_bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes + device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes + 1); + + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); + root->fs_info->fs_devices->num_devices++; +out: + btrfs_end_transaction(trans, root); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; + +out_close_bdev: + close_bdev_excl(bdev); + goto out; +} + int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6fe8440b37a7..6f173450378b 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -133,4 +133,5 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, u8 *uuid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); +int btrfs_init_new_device(struct btrfs_root *root, char *path); #endif -- cgit v1.2.3 From a061fc8da7b990faa41ca503e66faef3ecdeead0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 May 2008 11:43:44 -0400 Subject: Btrfs: Add support for online device removal This required a few structural changes to the code that manages bdev pointers: The VFS super block now gets an anon-bdev instead of a pointer to the lowest bdev. This allows us to avoid swapping the super block bdev pointer around at run time. The code to read in the super block no longer goes through the extent buffer interface. Things got ugly keeping the mapping constant. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 101 ++++++++++------------ fs/btrfs/extent-tree.c | 139 +++++++++++++++++++++++++------ fs/btrfs/inode.c | 27 +++++- fs/btrfs/super.c | 35 ++------ fs/btrfs/transaction.c | 5 +- fs/btrfs/volumes.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/volumes.h | 3 + 8 files changed, 412 insertions(+), 122 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3b6f8524a4ad..33ab165591c5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -505,7 +505,7 @@ struct btrfs_fs_info { u64 alloc_start; struct btrfs_transaction *running_transaction; struct btrfs_super_block super_copy; - struct extent_buffer *sb_buffer; + struct btrfs_super_block super_for_commit; struct block_device *__bdev; struct super_block *sb; struct inode *btree_inode; @@ -1208,6 +1208,7 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); /* struct btrfs_super_block */ BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, generation, 64); BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fabc31b334b6..9d5424ad01a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); - spin_unlock(&em_tree->lock); - if (em) + if (em) { + em->bdev = + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + spin_unlock(&em_tree->lock); goto out; + } + spin_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, em->start = 0; em->len = (u64)-1; em->block_start = 0; - em->bdev = inode->i_sb->s_bdev; + em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; - submit_bio(rw, bio); - return 0; - } return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } @@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) list = &fs_info->fs_devices->devices; list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - if (device->bdev && device->bdev != fs_info->sb->s_bdev) - close_bdev_excl(device->bdev); + close_bdev_excl(device->bdev); device->bdev = NULL; } return 0; @@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + struct buffer_head *bh; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + sb->s_blocksize = 4096; + sb->s_blocksize_bits = blksize_bits(4096); + /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET, - 4096); - if (!fs_info->sb_buffer) + bh = __bread(fs_devices->latest_bdev, + BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) goto fail_iput; - read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, - sizeof(fs_info->super_copy)); + memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); + brelse(bh); - read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, - (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), - BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) @@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; - sb_set_blocksize(sb, sectorsize); + + sb->s_blocksize = sectorsize; + sb->s_blocksize_bits = blksize_bits(sectorsize); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -1339,7 +1339,6 @@ fail_tree_root: fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: - free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); @@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root) struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; - struct extent_buffer *sb; + struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; + u32 crc; + u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); - sb = root->fs_info->sb_buffer; - dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, - dev_item); + sb = &root->fs_info->super_for_commit; + dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); - btrfs_set_device_type(sb, dev_item, dev->type); - btrfs_set_device_id(sb, dev_item, dev->devid); - btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); - btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); - btrfs_set_device_io_align(sb, dev_item, dev->io_align); - btrfs_set_device_io_width(sb, dev_item, dev->io_width); - btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); - write_extent_buffer(sb, dev->uuid, - (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_UUID_SIZE); - - btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, sb, 0); - - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / - root->fs_info->sb->s_blocksize, + btrfs_set_stack_device_type(dev_item, dev->type); + btrfs_set_stack_device_id(dev_item, dev->devid); + btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_io_align(dev_item, dev->io_align); + btrfs_set_stack_device_io_width(dev_item, dev->io_width); + btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); + memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); + btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); + + + crc = ~(u32)0; + crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, BTRFS_SUPER_INFO_SIZE); - read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); dev->pending_io = bh; get_bh(bh); @@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; ret = write_all_supers(root); -#if 0 - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); - ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, - super->start, super->len); - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); -#endif return ret; } @@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root) if (root->fs_info->dev_root->node); free_extent_buffer(root->fs_info->dev_root->node); - free_extent_buffer(fs_info->sb_buffer); - btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); @@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root) { struct backing_dev_info *bdi; - bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = &root->fs_info->bdi; if (root->fs_info->throttles && bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f94794a99329..c0e67bde8428 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -147,6 +147,8 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct u64 end; int ret; + bytenr = max_t(u64, bytenr, + BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE); block_group_cache = &info->block_group_cache; ret = find_first_extent_bit(block_group_cache, bytenr, &start, &end, @@ -1059,16 +1061,25 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) } } -static u64 reduce_alloc_profile(u64 flags) +static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags) { + u64 num_devices = root->fs_info->fs_devices->num_devices; + + if (num_devices == 1) + flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); + if (num_devices < 4) + flags &= ~BTRFS_BLOCK_GROUP_RAID10; + if ((flags & BTRFS_BLOCK_GROUP_DUP) && (flags & (BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_RAID10))) + BTRFS_BLOCK_GROUP_RAID10))) { flags &= ~BTRFS_BLOCK_GROUP_DUP; + } if ((flags & BTRFS_BLOCK_GROUP_RAID1) && - (flags & BTRFS_BLOCK_GROUP_RAID10)) + (flags & BTRFS_BLOCK_GROUP_RAID10)) { flags &= ~BTRFS_BLOCK_GROUP_RAID1; + } if ((flags & BTRFS_BLOCK_GROUP_RAID0) && ((flags & BTRFS_BLOCK_GROUP_RAID1) | @@ -1078,7 +1089,6 @@ static u64 reduce_alloc_profile(u64 flags) return flags; } - static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 alloc_bytes, u64 flags) @@ -1089,7 +1099,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 num_bytes; int ret; - flags = reduce_alloc_profile(flags); + flags = reduce_alloc_profile(extent_root, flags); space_info = __find_space_info(extent_root->fs_info, flags); if (!space_info) { @@ -1169,6 +1179,21 @@ static int update_block_group(struct btrfs_trans_handle *trans, return 0; } +static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) +{ + u64 start; + u64 end; + int ret; + ret = find_first_extent_bit(&root->fs_info->block_group_cache, + search_start, &start, &end, + BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA | + BLOCK_GROUP_SYSTEM); + if (ret) + return 0; + return start; +} + + static int update_pinned_extents(struct btrfs_root *root, u64 bytenr, u64 num, int pin) { @@ -1185,16 +1210,25 @@ static int update_pinned_extents(struct btrfs_root *root, } while (num > 0) { cache = btrfs_lookup_block_group(fs_info, bytenr); - WARN_ON(!cache); - len = min(num, cache->key.offset - - (bytenr - cache->key.objectid)); + if (!cache) { + u64 first = first_logical_byte(root, bytenr); + WARN_ON(first < bytenr); + len = min(first - bytenr, num); + } else { + len = min(num, cache->key.offset - + (bytenr - cache->key.objectid)); + } if (pin) { - cache->pinned += len; - cache->space_info->bytes_pinned += len; + if (cache) { + cache->pinned += len; + cache->space_info->bytes_pinned += len; + } fs_info->total_pinned += len; } else { - cache->pinned -= len; - cache->space_info->bytes_pinned -= len; + if (cache) { + cache->pinned -= len; + cache->space_info->bytes_pinned -= len; + } fs_info->total_pinned -= len; } bytenr += len; @@ -1547,7 +1581,7 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, int data) { int ret; - u64 orig_search_start = search_start; + u64 orig_search_start; struct btrfs_root * root = orig_root->fs_info->extent_root; struct btrfs_fs_info *info = root->fs_info; u64 total_needed = num_bytes; @@ -1577,6 +1611,9 @@ static int noinline find_free_extent(struct btrfs_trans_handle *trans, } } + search_start = max(search_start, first_logical_byte(root, 0)); + orig_search_start = search_start; + if (search_end == (u64)-1) search_end = btrfs_super_total_bytes(&info->super_copy); @@ -1751,7 +1788,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; } again: - data = reduce_alloc_profile(data); + data = reduce_alloc_profile(root, data); if (root->ref_cows) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { ret = do_chunk_alloc(trans, root->fs_info->extent_root, @@ -2309,6 +2346,7 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, struct file_ra_state *ra; unsigned long total_read = 0; unsigned long ra_pages; + struct btrfs_trans_handle *trans; ra = kzalloc(sizeof(*ra), GFP_NOFS); @@ -2326,9 +2364,13 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, calc_ra(i, last_index, ra_pages)); } total_read++; + if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size) + goto truncate_racing; + page = grab_cache_page(inode->i_mapping, i); - if (!page) + if (!page) { goto out_unlock; + } if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -2350,20 +2392,33 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, lock_extent(io_tree, page_start, page_end, GFP_NOFS); - set_page_dirty(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); + set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); page_cache_release(page); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); } + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); out_unlock: kfree(ra); + trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); + if (trans) { + btrfs_add_ordered_inode(inode); + btrfs_end_transaction(trans, BTRFS_I(inode)->root); + mark_inode_dirty(inode); + } mutex_unlock(&inode->i_mutex); return 0; + +truncate_racing: + vmtruncate(inode, inode->i_size); + balance_dirty_pages_ratelimited_nr(inode->i_mapping, + total_read); + goto out_unlock; } /* @@ -2466,6 +2521,27 @@ out: return 0; } +static int noinline del_extent_zero(struct btrfs_root *extent_root, + struct btrfs_path *path, + struct btrfs_key *extent_key) +{ + int ret; + struct btrfs_trans_handle *trans; + + trans = btrfs_start_transaction(extent_root, 1); + ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); + if (ret > 0) { + ret = -EIO; + goto out; + } + if (ret < 0) + goto out; + ret = btrfs_del_item(trans, extent_root, path); +out: + btrfs_end_transaction(trans, extent_root); + return ret; +} + static int noinline relocate_one_extent(struct btrfs_root *extent_root, struct btrfs_path *path, struct btrfs_key *extent_key) @@ -2477,6 +2553,10 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, u32 item_size; int ret = 0; + if (extent_key->objectid == 0) { + ret = del_extent_zero(extent_root, path, extent_key); + goto out; + } key.objectid = extent_key->objectid; key.type = BTRFS_EXTENT_REF_KEY; key.offset = 0; @@ -2490,15 +2570,24 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root, ret = 0; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); - if (path->slots[0] == nritems) - goto out; + if (path->slots[0] == nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret > 0) { + ret = 0; + goto out; + } + if (ret < 0) + goto out; + } btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid != extent_key->objectid) + if (found_key.objectid != extent_key->objectid) { break; + } - if (found_key.type != BTRFS_EXTENT_REF_KEY) + if (found_key.type != BTRFS_EXTENT_REF_KEY) { break; + } key.offset = found_key.offset + 1; item_size = btrfs_item_size_nr(leaf, path->slots[0]); @@ -2519,7 +2608,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - num_devices = btrfs_super_num_devices(&root->fs_info->super_copy); + num_devices = root->fs_info->fs_devices->num_devices; if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -2535,9 +2624,6 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } else { /* they already had raid on here, just return */ - if ((flags & BTRFS_BLOCK_GROUP_DUP) && - (flags & BTRFS_BLOCK_GROUP_RAID1)) { - } if (flags & stripped) return flags; @@ -2570,7 +2656,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start) struct extent_buffer *leaf; u32 nritems; int ret; - int progress = 0; + int progress; shrink_block_group = btrfs_lookup_block_group(root->fs_info, shrink_start); @@ -2597,6 +2683,7 @@ again: shrink_block_group->ro = 1; total_found = 0; + progress = 0; key.objectid = shrink_start; key.offset = 0; key.type = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7beb9b0d37a..b437d3bdf95e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2194,6 +2194,8 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); + if (em) + em->bdev = root->fs_info->fs_devices->latest_bdev; spin_unlock(&em_tree->lock); if (em) { @@ -2212,7 +2214,7 @@ again: em->start = EXTENT_MAP_HOLE; em->len = (u64)-1; - em->bdev = inode->i_sb->s_bdev; + em->bdev = root->fs_info->fs_devices->latest_bdev; ret = btrfs_lookup_file_extent(trans, root, path, objectid, start, trans != NULL); if (ret < 0) { @@ -3101,6 +3103,27 @@ out: return ret; } +long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_vol_args *vol_args; + int ret; + + vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); + + if (!vol_args) + return -ENOMEM; + + if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { + ret = -EFAULT; + goto out; + } + ret = btrfs_rm_device(root, vol_args->name); + +out: + kfree(vol_args); + return ret; +} + int dup_item_to_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -3294,6 +3317,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_resize(root, (void __user *)arg); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, (void __user *)arg); + case BTRFS_IOC_RM_DEV: + return btrfs_ioctl_rm_dev(root, (void __user *)arg); case BTRFS_IOC_BALANCE: return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7153dfaa3404..020e5a83e31f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -315,24 +315,12 @@ static void btrfs_write_super(struct super_block *sb) sb->s_dirt = 0; } -/* - * This is almost a copy of get_sb_bdev in fs/super.c. - * We need the local copy to allow direct mounting of - * subvolumes, but this could be easily integrated back - * into the generic version. --hch - */ - -/* start copy & paste */ -static int set_bdev_super(struct super_block *s, void *data) +static int btrfs_test_super(struct super_block *s, void *data) { - s->s_bdev = data; - s->s_dev = s->s_bdev->bd_dev; - return 0; -} + struct btrfs_fs_devices *test_fs_devices = data; + struct btrfs_root *root = btrfs_sb(s); -static int test_bdev_super(struct super_block *s, void *data) -{ - return (void *)s->s_bdev == data; + return root->fs_info->fs_devices == test_fs_devices; } int btrfs_get_sb_bdev(struct file_system_type *fs_type, @@ -354,14 +342,9 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, return error; bdev = fs_devices->lowest_bdev; - /* - * once the super is inserted into the list by sget, s_umount - * will protect the lockfs code from trying to start a snapshot - * while we are mounting - */ - down(&bdev->bd_mount_sem); - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); - up(&bdev->bd_mount_sem); + btrfs_lock_volumes(); + s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); + btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -373,13 +356,11 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); - sb_set_blocksize(s, block_size(bdev)); error = btrfs_fill_super(s, fs_devices, data, flags & MS_SILENT ? 1 : 0); if (error) { @@ -458,7 +439,7 @@ static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .get_sb = btrfs_get_sb, - .kill_sb = kill_block_super, + .kill_sb = kill_anon_super, .fs_flags = FS_REQUIRES_DEV, }; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9826942fa18a..57746c11eae3 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -738,9 +738,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, chunk_root->node->start); btrfs_set_super_chunk_root_level(&root->fs_info->super_copy, btrfs_header_level(chunk_root->node)); - write_extent_buffer(root->fs_info->sb_buffer, - &root->fs_info->super_copy, 0, - sizeof(root->fs_info->super_copy)); + memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, + sizeof(root->fs_info->super_copy)); btrfs_copy_pinned(root, pinned_copy); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b38187573108..55da5f0c56e3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -45,6 +45,16 @@ struct map_lookup { static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); +void btrfs_lock_volumes(void) +{ + mutex_lock(&uuid_mutex); +} + +void btrfs_unlock_volumes(void) +{ + mutex_unlock(&uuid_mutex); +} + int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -193,12 +203,14 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ret = PTR_ERR(bdev); goto fail; } + set_blocksize(bdev, 4096); if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = bdev; if (device->devid == fs_devices->lowest_devid) { fs_devices->lowest_bdev = bdev; } device->bdev = bdev; + } mutex_unlock(&uuid_mutex); return 0; @@ -393,6 +405,9 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_root *root = device->dev_root; struct btrfs_key key; + struct btrfs_key found_key; + struct extent_buffer *leaf = NULL; + struct btrfs_dev_extent *extent = NULL; path = btrfs_alloc_path(); if (!path) @@ -403,8 +418,25 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret > 0) { + ret = btrfs_previous_item(root, path, key.objectid, + BTRFS_DEV_EXTENT_KEY); + BUG_ON(ret); + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + BUG_ON(found_key.offset > start || found_key.offset + + btrfs_dev_extent_length(leaf, extent) < start); + ret = 0; + } else if (ret == 0) { + leaf = path->nodes[0]; + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_extent); + } BUG_ON(ret); + device->bytes_used -= btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -593,6 +625,170 @@ out: return ret; } +static int btrfs_rm_dev_item(struct btrfs_root *root, + struct btrfs_device *device) +{ + int ret; + struct btrfs_path *path; + struct block_device *bdev = device->bdev; + struct btrfs_device *next_dev; + struct btrfs_key key; + u64 total_bytes; + struct btrfs_fs_devices *fs_devices; + struct btrfs_trans_handle *trans; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + trans = btrfs_start_transaction(root, 1); + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.type = BTRFS_DEV_ITEM_KEY; + key.offset = device->devid; + + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + if (ret > 0) { + ret = -ENOENT; + goto out; + } + + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* + * at this point, the device is zero sized. We want to + * remove it from the devices list and zero out the old super + */ + list_del_init(&device->dev_list); + list_del_init(&device->dev_alloc_list); + fs_devices = root->fs_info->fs_devices; + + next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, + dev_list); + if (bdev == fs_devices->lowest_bdev) + fs_devices->lowest_bdev = next_dev->bdev; + if (bdev == root->fs_info->sb->s_bdev) + root->fs_info->sb->s_bdev = next_dev->bdev; + if (bdev == fs_devices->latest_bdev) + fs_devices->latest_bdev = next_dev->bdev; + + total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); + btrfs_set_super_total_bytes(&root->fs_info->super_copy, + total_bytes - device->total_bytes); + + total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); + btrfs_set_super_num_devices(&root->fs_info->super_copy, + total_bytes - 1); +out: + btrfs_free_path(path); + btrfs_commit_transaction(trans, root); + return ret; +} + +int btrfs_rm_device(struct btrfs_root *root, char *device_path) +{ + struct btrfs_device *device; + struct block_device *bdev; + struct buffer_head *bh; + struct btrfs_super_block *disk_super; + u64 all_avail; + u64 devid; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&uuid_mutex); + + all_avail = root->fs_info->avail_data_alloc_bits | + root->fs_info->avail_system_alloc_bits | + root->fs_info->avail_metadata_alloc_bits; + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && + root->fs_info->fs_devices->num_devices <= 4) { + printk("btrfs: unable to go below four devices on raid10\n"); + ret = -EINVAL; + goto out; + } + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && + root->fs_info->fs_devices->num_devices <= 2) { + printk("btrfs: unable to go below two devices on raid1\n"); + ret = -EINVAL; + goto out; + } + + bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + ret = -ENOENT; + goto error_brelse; + } + if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + device = btrfs_find_device(root, devid, NULL); + if (!device) { + ret = -ENOENT; + goto error_brelse; + } + + root->fs_info->fs_devices->num_devices--; + + ret = btrfs_shrink_device(device, 0); + if (ret) + goto error_brelse; + + + ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); + if (ret) + goto error_brelse; + + /* make sure this device isn't detected as part of the FS anymore */ + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + + brelse(bh); + + /* one close for the device struct or super_block */ + close_bdev_excl(device->bdev); + + /* one close for us */ + close_bdev_excl(device->bdev); + + kfree(device->name); + kfree(device); + ret = 0; + goto out; + +error_brelse: + brelse(bh); +error_close: + close_bdev_excl(bdev); +out: + mutex_unlock(&uuid_mutex); + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { struct btrfs_trans_handle *trans; @@ -831,13 +1027,17 @@ int btrfs_relocate_chunk(struct btrfs_root *root, em = lookup_extent_mapping(em_tree, chunk_offset, 1); spin_unlock(&em_tree->lock); - BUG_ON(em->start > chunk_offset || em->start + em->len < chunk_offset); + BUG_ON(em->start > chunk_offset || + em->start + em->len < chunk_offset); map = (struct map_lookup *)em->bdev; for (i = 0; i < map->num_stripes; i++) { ret = btrfs_free_dev_extent(trans, map->stripes[i].dev, map->stripes[i].physical); BUG_ON(ret); + + ret = btrfs_update_device(trans, map->stripes[i].dev); + BUG_ON(ret); } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); @@ -847,11 +1047,8 @@ int btrfs_relocate_chunk(struct btrfs_root *root, if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); - goto out; } - - spin_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); kfree(map); @@ -861,7 +1058,6 @@ int btrfs_relocate_chunk(struct btrfs_root *root, free_extent_map(em); spin_unlock(&em_tree->lock); -out: /* once for us */ free_extent_map(em); @@ -1449,7 +1645,7 @@ again: return 0; if (!em) { - printk("unable to find logical %Lu\n", logical); + printk("unable to find logical %Lu len %Lu\n", logical, *length); BUG(); } @@ -1712,6 +1908,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); + spin_lock(&map_tree->map_tree.lock); em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); spin_unlock(&map_tree->map_tree.lock); @@ -1845,7 +2042,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) int btrfs_read_sys_array(struct btrfs_root *root) { struct btrfs_super_block *super_copy = &root->fs_info->super_copy; - struct extent_buffer *sb = root->fs_info->sb_buffer; + struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; u8 *ptr; @@ -1857,6 +2054,12 @@ int btrfs_read_sys_array(struct btrfs_root *root) u32 cur; struct btrfs_key key; + sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, + BTRFS_SUPER_INFO_SIZE); + if (!sb) + return -ENOMEM; + btrfs_set_buffer_uptodate(sb); + write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); ptr = super_copy->sys_chunk_array; @@ -1867,8 +2070,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) disk_key = (struct btrfs_disk_key *)ptr; btrfs_disk_key_to_cpu(&key, disk_key); - len = sizeof(*disk_key); - ptr += len; + len = sizeof(*disk_key); ptr += len; sb_ptr += len; cur += len; @@ -1887,6 +2089,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb_ptr += len; cur += len; } + free_extent_buffer(sb); return ret; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index a9663e92bb14..0f94a69e6eb6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -125,6 +125,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); +int btrfs_rm_device(struct btrfs_root *root, char *device_path); int btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, @@ -136,4 +137,6 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_root *dev_root); +void btrfs_unlock_volumes(void); +void btrfs_lock_volumes(void); #endif -- cgit v1.2.3 From dfe25020689bb2d318782d2c9c7141203583fc70 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 May 2008 13:46:40 -0400 Subject: Btrfs: Add mount -o degraded to allow mounts to continue with missing devices Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 + fs/btrfs/disk-io.c | 49 ++++++---- fs/btrfs/disk-io.h | 3 +- fs/btrfs/super.c | 25 +++-- fs/btrfs/volumes.c | 279 ++++++++++++++++++++++++++++++++++++++--------------- fs/btrfs/volumes.h | 3 + 6 files changed, 253 insertions(+), 109 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c2c2c4e4d2a..a9377fb16c37 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -657,6 +657,7 @@ struct btrfs_root { #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) #define BTRFS_MOUNT_SSD (1 << 3) +#define BTRFS_MOUNT_DEGRADED (1 << 4) #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) @@ -1606,4 +1607,6 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); /* super.c */ u64 btrfs_parse_size(char *str); +int btrfs_parse_options(char *options, struct btrfs_root *root, + char **subvol_name); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 574b1245964e..38b0d9ecda6a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -614,21 +614,6 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, return ret; } -static int close_all_devices(struct btrfs_fs_info *fs_info) -{ - struct list_head *list; - struct list_head *next; - struct btrfs_device *device; - - list = &fs_info->fs_devices->devices; - list_for_each(next, list) { - device = list_entry(next, struct btrfs_device, dev_list); - close_bdev_excl(device->bdev); - device->bdev = NULL; - } - return 0; -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -927,6 +912,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->bdev) + continue; bdi = blk_get_backing_dev_info(device->bdev); if (bdi && bdi_congested(bdi, bdi_bits)) { ret = 1; @@ -1140,7 +1127,8 @@ static void btrfs_async_submit_work(struct work_struct *work) } struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices) + struct btrfs_fs_devices *fs_devices, + char *options) { u32 sectorsize; u32 nodesize; @@ -1276,12 +1264,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + btrfs_parse_options(options, tree_root, NULL); + + if (btrfs_super_num_devices(disk_super) > fs_devices->num_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), (unsigned long long)fs_devices->num_devices); - goto fail_sb_buffer; + if (btrfs_test_opt(tree_root, DEGRADED)) + printk("continuing in degraded mode\n"); + else { + goto fail_sb_buffer; + } } + fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); nodesize = btrfs_super_nodesize(disk_super); @@ -1329,6 +1324,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); + btrfs_close_extra_devices(fs_devices); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); @@ -1374,7 +1371,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); kfree(extent_root); @@ -1429,6 +1426,13 @@ int write_all_supers(struct btrfs_root *root) dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) { + total_errors++; + continue; + } + if (!dev->in_fs_metadata) + continue; + btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); @@ -1482,6 +1486,11 @@ int write_all_supers(struct btrfs_root *root) list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) + continue; + if (!dev->in_fs_metadata) + continue; + BUG_ON(!dev->pending_io); bh = dev->pending_io; wait_on_buffer(bh); @@ -1631,7 +1640,7 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 30d1ed293c25..2bc64fefe6ea 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -33,7 +33,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf); struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices); + struct btrfs_fs_devices *fs_devices, + char *options); int close_ctree(struct btrfs_root *root); int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 020e5a83e31f..273a5b511f50 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -65,11 +65,13 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, - Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, + Opt_degraded, Opt_subvol, Opt_nodatasum, Opt_nodatacow, + Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, + Opt_ssd, Opt_err, }; static match_table_t tokens = { + {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -106,9 +108,8 @@ u64 btrfs_parse_size(char *str) return res; } -static int parse_options (char * options, - struct btrfs_root *root, - char **subvol_name) +int btrfs_parse_options(char *options, struct btrfs_root *root, + char **subvol_name) { char * p; struct btrfs_fs_info *info = NULL; @@ -135,6 +136,12 @@ static int parse_options (char * options, token = match_token(p, tokens, args); switch (token) { + case Opt_degraded: + if (info) { + printk("btrfs: allowing degraded mounts\n"); + btrfs_set_opt(info->mount_opt, DEGRADED); + } + break; case Opt_subvol: if (subvol_name) { *subvol_name = match_strdup(&args[0]); @@ -234,7 +241,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - tree_root = open_ctree(sb, fs_devices); + tree_root = open_ctree(sb, fs_devices, (char *)data); if (IS_ERR(tree_root)) { printk("btrfs: open_ctree failed\n"); @@ -267,8 +274,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; } - parse_options((char *)data, tree_root, NULL); - /* this does the super kobj at the same time */ err = btrfs_sysfs_add_super(tree_root->fs_info); if (err) @@ -341,7 +346,7 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, if (error) return error; - bdev = fs_devices->lowest_bdev; + bdev = fs_devices->latest_bdev; btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); btrfs_unlock_volumes(); @@ -411,7 +416,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int ret; char *subvol_name = NULL; - parse_options((char *)data, NULL, &subvol_name); + btrfs_parse_options((char *)data, NULL, &subvol_name); ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, subvol_name ? subvol_name : "default"); if (subvol_name) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5fc7fb481474..43f74d17bcea 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -73,6 +73,7 @@ int btrfs_cleanup_fs_uuids(void) close_bdev_excl(dev->bdev); } list_del(&dev->dev_list); + kfree(dev->name); kfree(dev); } } @@ -127,7 +128,6 @@ static int device_list_add(const char *path, memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - fs_devices->lowest_devid = (u64)-1; fs_devices->num_devices = 0; device = NULL; } else { @@ -159,13 +159,35 @@ static int device_list_add(const char *path, fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; } - if (fs_devices->lowest_devid > devid) { - fs_devices->lowest_devid = devid; - } *fs_devices_ret = fs_devices; return 0; } +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) +{ + struct list_head *head = &fs_devices->devices; + struct list_head *cur; + struct btrfs_device *device; + + mutex_lock(&uuid_mutex); +again: + list_for_each(cur, head) { + device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->in_fs_metadata) { +printk("getting rid of extra dev %s\n", device->name); + if (device->bdev) + close_bdev_excl(device->bdev); + list_del(&device->dev_list); + list_del(&device->dev_alloc_list); + fs_devices->num_devices--; + kfree(device->name); + kfree(device); + goto again; + } + } + mutex_unlock(&uuid_mutex); + return 0; +} int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { struct list_head *head = &fs_devices->devices; @@ -179,6 +201,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) close_bdev_excl(device->bdev); } device->bdev = NULL; + device->in_fs_metadata = 0; } mutex_unlock(&uuid_mutex); return 0; @@ -199,6 +222,9 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (device->bdev) continue; + if (!device->name) + continue; + bdev = open_bdev_excl(device->name, flags, holder); if (IS_ERR(bdev)) { @@ -209,10 +235,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, set_blocksize(bdev, 4096); if (device->devid == fs_devices->latest_devid) fs_devices->latest_bdev = bdev; - if (device->devid == fs_devices->lowest_devid) { - fs_devices->lowest_bdev = bdev; - } device->bdev = bdev; + device->in_fs_metadata = 0; } mutex_unlock(&uuid_mutex); @@ -439,7 +463,8 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, } BUG_ON(ret); - device->bytes_used -= btrfs_dev_extent_length(leaf, extent); + if (device->bytes_used > 0) + device->bytes_used -= btrfs_dev_extent_length(leaf, extent); ret = btrfs_del_item(trans, root, path); BUG_ON(ret); @@ -460,6 +485,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; + WARN_ON(!device->in_fs_metadata); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -674,8 +700,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, dev_list); - if (bdev == fs_devices->lowest_bdev) - fs_devices->lowest_bdev = next_dev->bdev; if (bdev == root->fs_info->sb->s_bdev) root->fs_info->sb->s_bdev = next_dev->bdev; if (bdev == fs_devices->latest_bdev) @@ -698,7 +722,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) { struct btrfs_device *device; struct block_device *bdev; - struct buffer_head *bh; + struct buffer_head *bh = NULL; struct btrfs_super_block *disk_super; u64 all_avail; u64 devid; @@ -712,47 +736,73 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->num_devices <= 4) { + btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { printk("btrfs: unable to go below four devices on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->num_devices <= 2) { + btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { printk("btrfs: unable to go below two devices on raid1\n"); ret = -EINVAL; goto out; } - bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - goto out; - } + if (strcmp(device_path, "missing") == 0) { + struct list_head *cur; + struct list_head *devices; + struct btrfs_device *tmp; - bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); - if (!bh) { - ret = -EIO; - goto error_close; - } - disk_super = (struct btrfs_super_block *)bh->b_data; - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -ENOENT; - goto error_brelse; - } - if (memcmp(disk_super->fsid, root->fs_info->fsid, BTRFS_FSID_SIZE)) { - ret = -ENOENT; - goto error_brelse; - } - devid = le64_to_cpu(disk_super->dev_item.devid); - device = btrfs_find_device(root, devid, NULL); - if (!device) { - ret = -ENOENT; - goto error_brelse; - } + device = NULL; + devices = &root->fs_info->fs_devices->devices; + list_for_each(cur, devices) { + tmp = list_entry(cur, struct btrfs_device, dev_list); + if (tmp->in_fs_metadata && !tmp->bdev) { + device = tmp; + break; + } + } + bdev = NULL; + bh = NULL; + disk_super = NULL; + if (!device) { + printk("btrfs: no missing devices found to remove\n"); + goto out; + } + + } else { + bdev = open_bdev_excl(device_path, 0, + root->fs_info->bdev_holder); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) { + ret = -EIO; + goto error_close; + } + disk_super = (struct btrfs_super_block *)bh->b_data; + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + ret = -ENOENT; + goto error_brelse; + } + if (memcmp(disk_super->fsid, root->fs_info->fsid, + BTRFS_FSID_SIZE)) { + ret = -ENOENT; + goto error_brelse; + } + devid = le64_to_cpu(disk_super->dev_item.devid); + device = btrfs_find_device(root, devid, NULL); + if (!device) { + ret = -ENOENT; + goto error_brelse; + } + } root->fs_info->fs_devices->num_devices--; ret = btrfs_shrink_device(device, 0); @@ -764,19 +814,25 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_brelse; - /* make sure this device isn't detected as part of the FS anymore */ - memset(&disk_super->magic, 0, sizeof(disk_super->magic)); - set_buffer_dirty(bh); - sync_dirty_buffer(bh); - - brelse(bh); - - /* one close for the device struct or super_block */ - close_bdev_excl(device->bdev); + if (bh) { + /* make sure this device isn't detected as part of + * the FS anymore + */ + memset(&disk_super->magic, 0, sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); - /* one close for us */ - close_bdev_excl(device->bdev); + brelse(bh); + } + if (device->bdev) { + /* one close for the device struct or super_block */ + close_bdev_excl(device->bdev); + } + if (bdev) { + /* one close for us */ + close_bdev_excl(bdev); + } kfree(device->name); kfree(device); ret = 0; @@ -785,7 +841,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) error_brelse: brelse(bh); error_close: - close_bdev_excl(bdev); + if (bdev) + close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); mutex_unlock(&root->fs_info->fs_mutex); @@ -839,6 +896,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->total_bytes = i_size_read(bdev->bd_inode); device->dev_root = root->fs_info->dev_root; device->bdev = bdev; + device->in_fs_metadata = 1; ret = btrfs_add_device(trans, root, device); if (ret) @@ -1041,8 +1099,10 @@ int btrfs_relocate_chunk(struct btrfs_root *root, map->stripes[i].physical); BUG_ON(ret); - ret = btrfs_update_device(trans, map->stripes[i].dev); - BUG_ON(ret); + if (map->stripes[i].dev) { + ret = btrfs_update_device(trans, map->stripes[i].dev); + BUG_ON(ret); + } } ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid, chunk_offset); @@ -1415,10 +1475,13 @@ again: while(index < num_stripes) { device = list_entry(cur, struct btrfs_device, dev_alloc_list); - avail = device->total_bytes - device->bytes_used; + if (device->total_bytes > device->bytes_used) + avail = device->total_bytes - device->bytes_used; + else + avail = 0; cur = cur->next; - if (avail >= min_free) { + if (device->in_fs_metadata && avail >= min_free) { u64 ignored_start = 0; ret = find_free_dev_extent(trans, device, path, min_free, @@ -1430,7 +1493,7 @@ again: if (type & BTRFS_BLOCK_GROUP_DUP) index++; } - } else if (avail > max_avail) + } else if (device->in_fs_metadata && avail > max_avail) max_avail = avail; if (cur == dev_list) break; @@ -1610,6 +1673,22 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) return ret; } +static int find_live_mirror(struct map_lookup *map, int first, int num, + int optimal) +{ + int i; + if (map->stripes[optimal].dev->bdev) + return optimal; + for (i = first; i < first + num; i++) { + if (map->stripes[i].dev->bdev) + return i; + } + /* we couldn't find one that doesn't fail. Just return something + * and the io error handling code will clean up eventually + */ + return optimal; +} + static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, u64 logical, u64 *length, struct btrfs_multi_bio **multi_ret, @@ -1712,8 +1791,11 @@ again: num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; - else - stripe_index = current->pid % map->num_stripes; + else { + stripe_index = find_live_mirror(map, 0, + map->num_stripes, + current->pid % map->num_stripes); + } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { if (rw & (1 << BIO_RW)) @@ -1731,8 +1813,11 @@ again: num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; - else - stripe_index += current->pid % map->sub_stripes; + else { + stripe_index = find_live_mirror(map, stripe_index, + map->sub_stripes, stripe_index + + current->pid % map->sub_stripes); + } } else { /* * after this do_div call, stripe_nr is the number of stripes @@ -1749,9 +1834,11 @@ again: struct backing_dev_info *bdi; device = map->stripes[stripe_index].dev; - bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { - bdi->unplug_io_fn(bdi, unplug_page); + if (device->bdev) { + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, unplug_page); + } } } else { multi->stripes[i].physical = @@ -1880,12 +1967,21 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } bio->bi_sector = multi->stripes[dev_nr].physical >> 9; dev = multi->stripes[dev_nr].dev; - - bio->bi_bdev = dev->bdev; - spin_lock(&dev->io_lock); - dev->total_ios++; - spin_unlock(&dev->io_lock); - submit_bio(rw, bio); + if (dev && dev->bdev) { + bio->bi_bdev = dev->bdev; + spin_lock(&dev->io_lock); + dev->total_ios++; + spin_unlock(&dev->io_lock); + submit_bio(rw, bio); + } else { + bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; + bio->bi_sector = logical >> 9; +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, -EIO); +#else + bio_endio(bio, -EIO); +#endif + } dev_nr++; } if (total_devs == 1) @@ -1901,6 +1997,27 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, return __find_device(head, devid, uuid); } +static struct btrfs_device *add_missing_dev(struct btrfs_root *root, + u64 devid, u8 *dev_uuid) +{ + struct btrfs_device *device; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + + device = kzalloc(sizeof(*device), GFP_NOFS); + list_add(&device->dev_list, + &fs_devices->devices); + list_add(&device->dev_alloc_list, + &fs_devices->alloc_list); + device->barriers = 1; + device->dev_root = root->fs_info->dev_root; + device->devid = devid; + fs_devices->num_devices++; + spin_lock_init(&device->io_lock); + memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; +} + + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -1965,11 +2082,22 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); map->stripes[i].dev = btrfs_find_device(root, devid, uuid); - if (!map->stripes[i].dev) { + + if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { kfree(map); free_extent_map(em); return -EIO; } + if (!map->stripes[i].dev) { + map->stripes[i].dev = + add_missing_dev(root, devid, uuid); + if (!map->stripes[i].dev) { + kfree(map); + free_extent_map(em); + return -EIO; + } + } + map->stripes[i].dev->in_fs_metadata = 1; } spin_lock(&map_tree->map_tree.lock); @@ -2016,20 +2144,15 @@ static int read_one_dev(struct btrfs_root *root, BTRFS_UUID_SIZE); device = btrfs_find_device(root, devid, dev_uuid); if (!device) { - printk("warning devid %Lu not found already\n", devid); - device = kzalloc(sizeof(*device), GFP_NOFS); + printk("warning devid %Lu missing\n", devid); + device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; - list_add(&device->dev_list, - &root->fs_info->fs_devices->devices); - list_add(&device->dev_alloc_list, - &root->fs_info->fs_devices->alloc_list); - device->barriers = 1; - spin_lock_init(&device->io_lock); } fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; + device->in_fs_metadata = 1; ret = 0; #if 0 ret = btrfs_open_device(device); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 0f94a69e6eb6..454fe8103329 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -27,8 +27,10 @@ struct btrfs_device { struct list_head dev_alloc_list; struct btrfs_root *dev_root; struct buffer_head *pending_io; + u64 generation; int barriers; + int in_fs_metadata; spinlock_t io_lock; @@ -122,6 +124,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int btrfs_scan_one_device(const char *path, int flags, void *holder, struct btrfs_fs_devices **fs_devices_ret); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); +int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); int btrfs_add_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); -- cgit v1.2.3 From e1b81e6761bd8419146d4bbe0aadd8b2d348a01c Mon Sep 17 00:00:00 2001 From: Mingming Date: Tue, 27 May 2008 10:55:43 -0400 Subject: btrfs delete ordered inode handling fix Use btrfs_release_file instead of a put_inode call Signed-off-by: Chris Mason --- fs/btrfs/file.c | 7 +++++++ fs/btrfs/inode.c | 24 +----------------------- fs/btrfs/ordered-data.c | 21 ++++++++++++++------- fs/btrfs/ordered-data.h | 2 +- fs/btrfs/super.c | 1 - 5 files changed, 23 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d968900c6973..c4fa4664a456 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -978,6 +978,12 @@ out_nolock: return num_written ? num_written : err; } +static int btrfs_release_file (struct inode * inode, struct file * filp) +{ + btrfs_del_ordered_inode(inode); + return 0; +} + static int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { @@ -1044,6 +1050,7 @@ struct file_operations btrfs_file_operations = { .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, + .release = btrfs_release_file, .fsync = btrfs_sync_file, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40b4a8ec17fe..1569fb864515 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -857,15 +857,11 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; if (inode->i_nlink == 0) { - int found; /* if the inode isn't linked anywhere, * we don't need to worry about * data=ordered */ - found = btrfs_del_ordered_inode(inode); - if (found == 1) { - atomic_dec(&inode->i_count); - } + btrfs_del_ordered_inode(inode); } btrfs_end_transaction(trans, root); @@ -1271,24 +1267,6 @@ fail: return err; } -void btrfs_put_inode(struct inode *inode) -{ - int ret; - - if (!BTRFS_I(inode)->ordered_trans) { - return; - } - - if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || - mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) - return; - - ret = btrfs_del_ordered_inode(inode); - if (ret == 1) { - atomic_dec(&inode->i_count); - } -} - void btrfs_delete_inode(struct inode *inode) { struct btrfs_trans_handle *trans; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 3814cb0246b4..8dd8180183ff 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -231,7 +231,7 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, return 1; } -static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, +static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, struct inode *inode, u64 root_objectid, u64 objectid) { @@ -243,31 +243,38 @@ static int __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree, node = __tree_search(&tree->tree, root_objectid, objectid, &prev); if (!node) { write_unlock(&tree->lock); - return 0; + return; } rb_erase(node, &tree->tree); BTRFS_I(inode)->ordered_trans = 0; write_unlock(&tree->lock); + atomic_dec(&inode->i_count); entry = rb_entry(node, struct tree_entry, rb_node); kfree(entry); - return 1; + return; } -int btrfs_del_ordered_inode(struct inode *inode) +void btrfs_del_ordered_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 root_objectid = root->root_key.objectid; - int ret = 0; + + if (!BTRFS_I(inode)->ordered_trans) { + return; + } + + if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) || + mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)) + return; spin_lock(&root->fs_info->new_trans_lock); if (root->fs_info->running_transaction) { struct btrfs_ordered_inode_tree *tree; tree = &root->fs_info->running_transaction->ordered_inode_tree; - ret = __btrfs_del_ordered_inode(tree, inode, root_objectid, + __btrfs_del_ordered_inode(tree, inode, root_objectid, inode->i_ino); } spin_unlock(&root->fs_info->new_trans_lock); - return ret; } int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 29047e0abaab..c515c4b39996 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -38,6 +38,6 @@ int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree, u64 *root_objectid, u64 *objectid, struct inode **inode); -int btrfs_del_ordered_inode(struct inode *inode); +void btrfs_del_ordered_inode(struct inode *inode); int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 273a5b511f50..05029656e42e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -487,7 +487,6 @@ static void btrfs_unlockfs(struct super_block *sb) static struct super_operations btrfs_super_ops = { .delete_inode = btrfs_delete_inode, - .put_inode = btrfs_put_inode, .put_super = btrfs_put_super, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, -- cgit v1.2.3 From f819d837eea9829ed6c356fb88cdd0a170cbd947 Mon Sep 17 00:00:00 2001 From: Linda Knippers Date: Mon, 9 Jun 2008 22:17:11 -0400 Subject: btrfsctl -A error code fixup Send the error back to userland if the ioctl fails Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 05029656e42e..77f44494e229 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -453,7 +453,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, { struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; - int ret; + int ret = 0; int len; vol = kmalloc(sizeof(*vol), GFP_KERNEL); @@ -470,7 +470,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, } out: kfree(vol); - return 0; + return ret; } static void btrfs_write_super_lockfs(struct super_block *sb) -- cgit v1.2.3 From 6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jun 2008 10:07:39 -0400 Subject: Btrfs: transaction ioctls These ioctls let a user application hold a transaction open while it performs a series of operations. A final ioctl does a sync on the fs (closing the current transaction). This is the main requirement for Ceph's OSD to be able to keep the data it's storing in a btrfs volume consistent, and AFAICS it works just fine. The application would do something like fd = ::open("some/file", O_RDONLY); ::ioctl(fd, BTRFS_IOC_TRANS_START); /* do a bunch of stuff */ ::ioctl(fd, BTRFS_IOC_TRANS_END); or just ::close(fd); And to ensure it commits to disk, ::ioctl(fd, BTRFS_IOC_SYNC); When a transaction is held open, the trans_handle is attached to the struct file (via private_data) so that it will get cleaned up if the process dies unexpectedly. A held transaction is also ended on fsync() to avoid a deadlock. A misbehaving application could also deliberately hold a transaction open, effectively locking up the FS, so it may make sense to restrict something like this to root or something. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ fs/btrfs/file.c | 7 ++++++- fs/btrfs/inode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 9 ++++++++ fs/btrfs/super.c | 2 +- 5 files changed, 83 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad4eacca7f59..1dcf4fb5b688 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); +long btrfs_ioctl_trans_end(struct file *file); long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, struct btrfs_root *root); @@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations; int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 start, u64 end, u64 inline_limit, u64 *hint_block); +int btrfs_release_file(struct inode *inode, struct file *file); + /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only); @@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, u64 btrfs_parse_size(char *str); int btrfs_parse_options(char *options, struct btrfs_root *root, char **subvol_name); +int btrfs_sync_fs(struct super_block *sb, int wait); #endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c4fa4664a456..73c6d085bd90 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -978,9 +978,11 @@ out_nolock: return num_written ? num_written : err; } -static int btrfs_release_file (struct inode * inode, struct file * filp) +int btrfs_release_file(struct inode * inode, struct file * filp) { btrfs_del_ordered_inode(inode); + if (filp->private_data) + btrfs_ioctl_trans_end(filp); return 0; } @@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file, /* * ok we haven't committed the transaction yet, lets do a commit */ + if (file->private_data) + btrfs_ioctl_trans_end(file); + trans = btrfs_start_transaction(root, 1); if (!trans) { ret = -ENOMEM; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31aa4ba06fce..0f14697becef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3336,6 +3336,61 @@ out_fput: return ret; } +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_start(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + if (file->private_data) { + ret = -EINPROGRESS; + goto out; + } + trans = btrfs_start_transaction(root, 0); + if (trans) + file->private_data = trans; + else + ret = -ENOMEM; + /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + +/* + * there are many ways the trans_start and trans_end ioctls can lead + * to deadlocks. They should only be used by applications that + * basically own the machine, and have a very in depth understanding + * of all the possible deadlocks and enospc problems. + */ +long btrfs_ioctl_trans_end(struct file *file) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + int ret = 0; + + mutex_lock(&root->fs_info->fs_mutex); + trans = file->private_data; + if (!trans) { + ret = -EINVAL; + goto out; + } + btrfs_end_transaction(trans, root); + file->private_data = 0; +out: + mutex_unlock(&root->fs_info->fs_mutex); + return ret; +} + long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_balance(root->fs_info->dev_root); case BTRFS_IOC_CLONE: return btrfs_ioctl_clone(file, arg); + case BTRFS_IOC_TRANS_START: + return btrfs_ioctl_trans_start(file); + case BTRFS_IOC_TRANS_END: + return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_SYNC: + btrfs_sync_fs(file->f_dentry->d_sb, 1); + return 0; } return -ENOTTY; @@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, #endif + .release = btrfs_release_file, }; static struct extent_io_ops btrfs_extent_io_ops = { diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b0e73f51d636..85ed35a775b1 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ struct btrfs_ioctl_vol_args) +/* trans start and trans end are dangerous, and only for + * use by applications that know how to avoid the + * resulting deadlocks + */ +#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) +#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) +#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) + #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ struct btrfs_ioctl_vol_args) @@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) + #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77f44494e229..39bb86945ed0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -293,7 +293,7 @@ fail_close: return err; } -static int btrfs_sync_fs(struct super_block *sb, int wait) +int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; struct btrfs_root *root; -- cgit v1.2.3 From edf24abe51493ccda384644d487fe2f796ac21c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:40:29 -0400 Subject: btrfs: sanity mount option parsing and early mount code Also adds lots of comments to describe what's going on here. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 5 +- fs/btrfs/super.c | 241 ++++++++++++++++++++++++++++++----------------------- 3 files changed, 141 insertions(+), 108 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1dcf4fb5b688..49cbc62b42f9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1616,7 +1616,6 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode); /* super.c */ u64 btrfs_parse_size(char *str); -int btrfs_parse_options(char *options, struct btrfs_root *root, - char **subvol_name); +int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); #endif diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3805e7eab82d..b9a53646ceb2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,8 +1266,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - btrfs_parse_options(options, tree_root, NULL); + err = btrfs_parse_options(tree_root, options); + if (err) + goto fail_sb_buffer; + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 39bb86945ed0..288300fa5848 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -108,15 +108,18 @@ u64 btrfs_parse_size(char *str) return res; } -int btrfs_parse_options(char *options, struct btrfs_root *root, - char **subvol_name) +/* + * Regular mount options parser. Everything that is needed only when + * reading in a new superblock is parsed here. + */ +int btrfs_parse_options(struct btrfs_root *root, char *options) { - char * p; - struct btrfs_fs_info *info = NULL; + struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; + char *p, *num; if (!options) - return 1; + return 0; /* * strsep changes the string, duplicate it because parse_options @@ -126,10 +129,8 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, if (!options) return -ENOMEM; - if (root) - info = root->fs_info; - while ((p = strsep (&options, ",")) != NULL) { + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; @@ -137,83 +138,64 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, token = match_token(p, tokens, args); switch (token) { case Opt_degraded: - if (info) { - printk("btrfs: allowing degraded mounts\n"); - btrfs_set_opt(info->mount_opt, DEGRADED); - } + printk(KERN_INFO "btrfs: allowing degraded mounts\n"); + btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: - if (subvol_name) { - *subvol_name = match_strdup(&args[0]); - } + /* + * This one is parsed by btrfs_parse_early_options + * and can be happily ignored here. + */ break; case Opt_nodatasum: - if (info) { - printk("btrfs: setting nodatacsum\n"); - btrfs_set_opt(info->mount_opt, NODATASUM); - } + printk(KERN_INFO "btrfs: setting nodatacsum\n"); + btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_nodatacow: - if (info) { - printk("btrfs: setting nodatacow\n"); - btrfs_set_opt(info->mount_opt, NODATACOW); - btrfs_set_opt(info->mount_opt, NODATASUM); - } + printk(KERN_INFO "btrfs: setting nodatacow\n"); + btrfs_set_opt(info->mount_opt, NODATACOW); + btrfs_set_opt(info->mount_opt, NODATASUM); break; case Opt_ssd: - if (info) { - printk("btrfs: use ssd allocation scheme\n"); - btrfs_set_opt(info->mount_opt, SSD); - } + printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); + btrfs_set_opt(info->mount_opt, SSD); break; case Opt_nobarrier: - if (info) { - printk("btrfs: turning off barriers\n"); - btrfs_set_opt(info->mount_opt, NOBARRIER); - } + printk(KERN_INFO "btrfs: turning off barriers\n"); + btrfs_set_opt(info->mount_opt, NOBARRIER); break; case Opt_max_extent: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->max_extent = - btrfs_parse_size(num); - kfree(num); - - info->max_extent = max_t(u64, - info->max_extent, - root->sectorsize); - printk("btrfs: max_extent at %Lu\n", - info->max_extent); - } + num = match_strdup(&args[0]); + if (num) { + info->max_extent = btrfs_parse_size(num); + kfree(num); + + info->max_extent = max_t(u64, + info->max_extent, root->sectorsize); + printk(KERN_INFO "btrfs: max_extent at %llu\n", + info->max_extent); } break; case Opt_max_inline: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->max_inline = - btrfs_parse_size(num); - kfree(num); - - info->max_inline = max_t(u64, - info->max_inline, - root->sectorsize); - printk("btrfs: max_inline at %Lu\n", - info->max_inline); - } + num = match_strdup(&args[0]); + if (num) { + info->max_inline = btrfs_parse_size(num); + kfree(num); + + info->max_inline = max_t(u64, + info->max_inline, root->sectorsize); + printk(KERN_INFO "btrfs: max_inline at %llu\n", + info->max_inline); } break; case Opt_alloc_start: - if (info) { - char *num = match_strdup(&args[0]); - if (num) { - info->alloc_start = - btrfs_parse_size(num); - kfree(num); - printk("btrfs: allocations start at " - "%Lu\n", info->alloc_start); - } + num = match_strdup(&args[0]); + if (num) { + info->alloc_start = btrfs_parse_size(num); + kfree(num); + printk(KERN_INFO + "btrfs: allocations start at %llu\n", + info->alloc_start); } break; default: @@ -221,7 +203,61 @@ int btrfs_parse_options(char *options, struct btrfs_root *root, } } kfree(options); - return 1; + return 0; +} + +/* + * Parse mount options that are required early in the mount process. + * + * All other options will be parsed on much later in the mount process and + * only when we need to allocate a new super block. + */ +static int btrfs_parse_early_options(const char *options, + char **subvol_name) +{ + substring_t args[MAX_OPT_ARGS]; + char *opts, *p; + int error = 0; + + if (!options) + goto out; + + /* + * strsep changes the string, duplicate it because parse_options + * gets called twice + */ + opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + + while ((p = strsep(&opts, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_subvol: + *subvol_name = match_strdup(&args[0]); + break; + default: + break; + } + } + + kfree(opts); + out: + /* + * If no subvolume name is specified we use the default one. Allocate + * a copy of the string "default" here so that code later in the + * mount path doesn't care if it's the default volume or another one. + */ + if (!*subvol_name) { + *subvol_name = kstrdup("default", GFP_KERNEL); + if (!*subvol_name) + return -ENOMEM; + } + return error; } static int btrfs_fill_super(struct super_block * sb, @@ -328,23 +364,33 @@ static int btrfs_test_super(struct super_block *s, void *data) return root->fs_info->fs_devices == test_fs_devices; } -int btrfs_get_sb_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt, const char *subvol) +/* + * Find a superblock for the given device / mount point. + * + * Note: This is based on get_sb_bdev from fs/super.c with a few additions + * for multiple device setup. Make sure to keep it in sync. + */ +static int btrfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { + char *subvol_name = NULL; struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; int error = 0; + error = btrfs_parse_early_options(data, &subvol_name); + if (error) + goto error; + error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices); if (error) - return error; + goto error_free_subvol_name; error = btrfs_open_devices(fs_devices, flags, fs_type); if (error) - return error; + goto error_free_subvol_name; bdev = fs_devices->latest_bdev; btrfs_lock_volumes(); @@ -378,51 +424,36 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; } - if (subvol) { - root = lookup_one_len(subvol, s->s_root, strlen(subvol)); - if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); - error = PTR_ERR(root); - goto error; - } - if (!root->d_inode) { - dput(root); - up_write(&s->s_umount); - deactivate_super(s); - error = -ENXIO; - goto error; - } - } else { - root = dget(s->s_root); + root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; } mnt->mnt_sb = s; mnt->mnt_root = root; + + kfree(subvol_name); return 0; error_s: error = PTR_ERR(s); error_bdev: btrfs_close_devices(fs_devices); +error_free_subvol_name: + kfree(subvol_name); error: return error; } -/* end copy & paste */ - -static int btrfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - int ret; - char *subvol_name = NULL; - - btrfs_parse_options((char *)data, NULL, &subvol_name); - ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt, - subvol_name ? subvol_name : "default"); - if (subvol_name) - kfree(subvol_name); - return ret; -} static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { -- cgit v1.2.3 From 43e570b08a6c6b1d75f218566a6240542a386fd9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:40:46 -0400 Subject: btrfs: allow scanning multiple devices during mount Allows to specify one or multiple device=/dev/foo options during mount so that ioctls on the control device can be avoided. Especially useful when trying to mount a multi-device setup as root. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/super.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 288300fa5848..346932e546ba 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -65,7 +65,7 @@ static void btrfs_put_super (struct super_block * sb) } enum { - Opt_degraded, Opt_subvol, Opt_nodatasum, Opt_nodatacow, + Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err, }; @@ -73,6 +73,7 @@ enum { static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, + {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, @@ -142,8 +143,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: + case Opt_device: /* - * This one is parsed by btrfs_parse_early_options + * These are parsed by btrfs_parse_early_options * and can be happily ignored here. */ break; @@ -212,8 +214,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) * All other options will be parsed on much later in the mount process and * only when we need to allocate a new super block. */ -static int btrfs_parse_early_options(const char *options, - char **subvol_name) +static int btrfs_parse_early_options(const char *options, int flags, + void *holder, char **subvol_name, + struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *opts, *p; @@ -240,11 +243,18 @@ static int btrfs_parse_early_options(const char *options, case Opt_subvol: *subvol_name = match_strdup(&args[0]); break; + case Opt_device: + error = btrfs_scan_one_device(match_strdup(&args[0]), + flags, holder, fs_devices); + if (error) + goto out_free_opts; + break; default: break; } } + out_free_opts: kfree(opts); out: /* @@ -380,7 +390,8 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, struct btrfs_fs_devices *fs_devices = NULL; int error = 0; - error = btrfs_parse_early_options(data, &subvol_name); + error = btrfs_parse_early_options(data, flags, fs_type, + &subvol_name, &fs_devices); if (error) goto error; -- cgit v1.2.3 From 15ada040d7cd68d7853938a92b116292cc16a2f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 16:51:38 -0400 Subject: Btrfs: Fix mount -o max_inline=0 max_inline=0 used to force the max_inline size to one sector instead. Now it properly disables inline data items, while still being able to read any that happen to exist on disk. Signed-off-by: Chris Mason --- fs/btrfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 346932e546ba..f3274befd46a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -184,8 +184,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->max_inline = btrfs_parse_size(num); kfree(num); - info->max_inline = max_t(u64, - info->max_inline, root->sectorsize); + if (info->max_inline) { + info->max_inline = max_t(u64, + info->max_inline, + root->sectorsize); + } printk(KERN_INFO "btrfs: max_inline at %llu\n", info->max_inline); } -- cgit v1.2.3 From 4543df7ecc8ae4928c1e51d6e7dc188d650abee4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 21:47:56 -0400 Subject: Btrfs: Add a mount option to control worker thread pool size mount -o thread_pool_size changes the default, which is min(num_cpus + 2, 8). Larger thread pools would make more sense on very large disk arrays. This mount option controls the max size of each thread pool. There are multiple thread pools, so the total worker count will be larger than the mount option. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 30 +++++++++++++++--------------- fs/btrfs/super.c | 13 ++++++++++++- 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6c91a510c965..7ae4666103c0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -532,6 +532,7 @@ struct btrfs_fs_info { */ struct btrfs_workers workers; struct btrfs_workers endio_workers; + int thread_pool_size; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) struct work_struct trans_work; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98ff4fbcb386..c6a710a668cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1117,6 +1117,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; int err = -EINVAL; + struct btrfs_super_block *disk_super; if (!extent_root || !tree_root || !fs_info) { @@ -1148,6 +1149,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); @@ -1195,19 +1197,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - /* we need to start all the end_io workers up front because the - * queue work function gets called at interrupt time. The endio - * workers don't normally start IO, so some number of them <= the - * number of cpus is fine. They handle checksumming after a read. - * - * The other worker threads do start IO, so the max is larger than - * the number of CPUs. FIXME, tune this for huge machines - */ - btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); - btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); - btrfs_start_workers(&fs_info->workers, 1); - btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); - #if 0 ret = add_hasher(fs_info, "crc32c"); if (ret) { @@ -1238,6 +1227,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (err) goto fail_sb_buffer; + /* + * we need to start all the end_io workers up front because the + * queue work function gets called at interrupt time, and so it + * cannot dynamically grow. + */ + btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,10 +1341,10 @@ fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); -fail_iput: - iput(fs_info->btree_inode); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); +fail_iput: + iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f3274befd46a..196d0e280b19 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -67,7 +67,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_err, + Opt_ssd, Opt_thread_pool, Opt_err, }; static match_table_t tokens = { @@ -80,6 +80,7 @@ static match_table_t tokens = { {Opt_max_extent, "max_extent=%s"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, + {Opt_thread_pool, "thread_pool=%d"}, {Opt_ssd, "ssd"}, {Opt_err, NULL} }; @@ -118,6 +119,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) struct btrfs_fs_info *info = root->fs_info; substring_t args[MAX_OPT_ARGS]; char *p, *num; + int intarg; if (!options) return 0; @@ -166,6 +168,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) printk(KERN_INFO "btrfs: turning off barriers\n"); btrfs_set_opt(info->mount_opt, NOBARRIER); break; + case Opt_thread_pool: + intarg = 0; + match_int(&args[0], &intarg); + if (intarg) { + info->thread_pool_size = intarg; + printk(KERN_INFO "btrfs: thread pool %d\n", + info->thread_pool_size); + } + break; case Opt_max_extent: num = match_strdup(&args[0]); if (num) { -- cgit v1.2.3 From a213501153fd66e2359e091b1612841305ba6551 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Replace the big fs_mutex with a collection of other locks Extent alloctions are still protected by a large alloc_mutex. Objectid allocations are covered by a objectid mutex Other btree operations are protected by a lock on individual btree nodes Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 40 +++++++++++++++++++++----- fs/btrfs/ctree.h | 5 ++-- fs/btrfs/disk-io.c | 15 +++++----- fs/btrfs/extent-tree.c | 18 +++++------- fs/btrfs/file.c | 7 +---- fs/btrfs/inode-map.c | 8 ++++++ fs/btrfs/inode.c | 76 ++++---------------------------------------------- fs/btrfs/ioctl.c | 24 ++++------------ fs/btrfs/super.c | 2 -- fs/btrfs/transaction.c | 42 +++++++++++----------------- fs/btrfs/volumes.c | 19 +++++++++---- fs/btrfs/xattr.c | 10 +------ 12 files changed, 101 insertions(+), 165 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 75625c68fdd8..dff4da082d06 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,6 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; + int skip = p->skip_locking; + int keep = p->keep_locks; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { if (!p->nodes[i]) continue; @@ -73,6 +76,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) free_extent_buffer(p->nodes[i]); } memset(p, 0, sizeof(*p)); + p->skip_locking = skip; + p->keep_locks = keep; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -1202,13 +1207,19 @@ static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) u32 nritems; t = path->nodes[i]; nritems = btrfs_header_nritems(t); - if (path->slots[i] >= nritems - 1) { + if (nritems < 2 || path->slots[i] >= nritems - 2) { +if (path->keep_locks) { +//printk("path %p skip level now %d\n", path, skip_level); +} skip_level = i + 1; continue; } } t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { +if (path->keep_locks) { +//printk("path %p unlocking level %d slot %d nritems %d skip_level %d\n", path, i, path->slots[i], btrfs_header_nritems(t), skip_level); +} btrfs_tree_unlock(t); path->locks[i] = 0; } @@ -1243,7 +1254,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len); WARN_ON(p->nodes[0] != NULL); - // WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); WARN_ON(root == root->fs_info->extent_root && !mutex_is_locked(&root->fs_info->alloc_mutex)); WARN_ON(root == root->fs_info->chunk_root && @@ -1321,7 +1331,7 @@ again: b = read_node_slot(root, b, slot); if (!p->skip_locking) btrfs_tree_lock(b); - unlock_up(p, level, lowest_unlock); + unlock_up(p, level + 1, lowest_unlock); } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < @@ -1804,6 +1814,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (slot >= btrfs_header_nritems(upper) - 1) return 1; + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); free_space = btrfs_leaf_free_space(root, right); @@ -1981,6 +1993,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root return 1; } + WARN_ON(!btrfs_tree_locked(path->nodes[1])); + left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); free_space = btrfs_leaf_free_space(root, left); @@ -2957,15 +2971,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); - path->keep_locks = 1; btrfs_release_path(root, path); + path->keep_locks = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) return ret; - if (path->slots[0] < nritems - 1) { + nritems = btrfs_header_nritems(path->nodes[0]); + if (nritems > 0 && path->slots[0] < nritems - 1) { goto done; } @@ -2992,8 +3007,17 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, c, slot); - if (!path->skip_locking) + if (!path->skip_locking) { + if (!btrfs_tree_locked(c)) { + int i; + WARN_ON(1); +printk("path %p no lock on level %d\n", path, level); +for (i = 0; i < BTRFS_MAX_LEVEL; i++) { +printk("path %p level %d slot %d nritems %d\n", path, i, path->slots[i], btrfs_header_nritems(path->nodes[i])); +} + } btrfs_tree_lock(next); + } break; } path->slots[level] = slot; @@ -3011,8 +3035,10 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); - if (!path->skip_locking) + if (!path->skip_locking) { + WARN_ON(!btrfs_tree_locked(path->nodes[level])); btrfs_tree_lock(next); + } } done: unlock_up(path, 0, 1); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 50891b39f366..692b8ea42de1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -519,9 +519,9 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; - struct mutex fs_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; + struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -554,7 +554,7 @@ struct btrfs_fs_info { struct completion kobj_unregister; int do_barriers; int closing; - unsigned long throttles; + atomic_t throttles; u64 total_pinned; struct list_head dirty_cowonly_roots; @@ -594,6 +594,7 @@ struct btrfs_root { struct inode *inode; struct kobject root_kobj; struct completion kobj_unregister; + struct mutex objectid_mutex; u64 objectid; u64 last_trans; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe40bdd984ff..f638803549e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -724,6 +724,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); spin_lock_init(&root->node_lock); + mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1146,6 +1147,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->throttles, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -1199,7 +1201,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); - mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1278,8 +1280,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_sb_buffer; } - mutex_lock(&fs_info->fs_mutex); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -1342,7 +1342,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - mutex_unlock(&fs_info->fs_mutex); return tree_root; fail_extent_root: @@ -1350,7 +1349,6 @@ fail_extent_root: fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: - mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); @@ -1562,8 +1560,9 @@ int close_ctree(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; fs_info->closing = 1; + smp_mb(); + btrfs_transaction_flush_work(root); - mutex_lock(&fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1574,7 +1573,6 @@ int close_ctree(struct btrfs_root *root) BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&fs_info->fs_mutex); btrfs_transaction_flush_work(root); @@ -1679,7 +1677,8 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; - if (root->fs_info->throttles && bdi_write_congested(bdi)) { + if (atomic_read(&root->fs_info->throttles) && + bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); #else diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7e40c516fe62..890b9e9d8e27 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1577,9 +1577,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used - num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -1968,8 +1970,10 @@ again: } /* block accounting for super block */ + spin_lock_irq(&info->delalloc_lock); super_used = btrfs_super_bytes_used(&info->super_copy); btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); + spin_unlock_irq(&info->delalloc_lock); /* block accounting for root item */ root_used = btrfs_root_used(&root->root_item); @@ -2172,12 +2176,12 @@ static void noinline reada_walk_down(struct btrfs_root *root, continue; } } - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); ret = readahead_tree_block(root, bytenr, blocksize, btrfs_node_ptr_generation(node, i)); last = bytenr + blocksize; cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); if (ret) break; } @@ -2254,11 +2258,9 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); reada_walk_down(root, cur, path->slots[*level]); - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->alloc_mutex); next = read_tree_block(root, bytenr, blocksize, ptr_gen); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->alloc_mutex); /* we've dropped the lock, double check */ @@ -2381,6 +2383,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root int orig_level; struct btrfs_root_item *root_item = &root->root_item; + WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); path = btrfs_alloc_path(); BUG_ON(!path); @@ -2710,7 +2713,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, *last_file_root == ref_root) goto out; - mutex_unlock(&extent_root->fs_info->fs_mutex); inode = btrfs_iget_locked(extent_root->fs_info->sb, ref_objectid, found_root); if (inode->i_state & I_NEW) { @@ -2727,7 +2729,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, * the latest version of the tree root */ if (is_bad_inode(inode)) { - mutex_lock(&extent_root->fs_info->fs_mutex); goto out; } *last_file_objectid = inode->i_ino; @@ -2736,7 +2737,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root, relocate_inode_pages(inode, ref_offset, extent_key->offset); iput(inode); - mutex_lock(&extent_root->fs_info->fs_mutex); } else { struct btrfs_trans_handle *trans; struct extent_buffer *eb; @@ -3033,9 +3033,7 @@ next: if (progress && need_resched()) { memcpy(&key, &found_key, sizeof(key)); - mutex_unlock(&root->fs_info->fs_mutex); cond_resched(); - mutex_lock(&root->fs_info->fs_mutex); btrfs_release_path(root, path); btrfs_search_slot(NULL, root, &key, path, 0, 0); progress = 0; @@ -3068,9 +3066,7 @@ next: trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(tree_root); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(tree_root, 1); btrfs_commit_transaction(trans, tree_root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 73c6d085bd90..18bbe108a0e6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -252,7 +252,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (!trans) { err = -ENOMEM; @@ -341,7 +340,6 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, failed: err = btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); return err; } @@ -905,9 +903,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(pages)); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, write_bytes, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -998,9 +994,9 @@ static int btrfs_sync_file(struct file *file, * check the transaction that last modified this inode * and see if its already been committed */ - mutex_lock(&root->fs_info->fs_mutex); if (!BTRFS_I(inode)->last_trans) goto out; + mutex_lock(&root->fs_info->trans_mutex); if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { @@ -1023,7 +1019,6 @@ static int btrfs_sync_file(struct file *file, } ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); return ret > 0 ? EIO : ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index a0925eabdaa2..298346ae1481 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -69,6 +69,12 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, struct btrfs_key search_key; u64 search_start = dirid; + mutex_lock(&root->objectid_mutex); + if (root->last_inode_alloc) { + *objectid = ++root->last_inode_alloc; + mutex_unlock(&root->objectid_mutex); + return 0; + } path = btrfs_alloc_path(); BUG_ON(!path); search_start = root->last_inode_alloc; @@ -124,9 +130,11 @@ found: btrfs_release_path(root, path); btrfs_free_path(path); BUG_ON(*objectid < search_start); + mutex_unlock(&root->objectid_mutex); return 0; error: btrfs_release_path(root, path); btrfs_free_path(path); + mutex_unlock(&root->objectid_mutex); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 61bd8953a683..b2251e27ac84 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -79,12 +79,15 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { - u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy); - u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy); + u64 total; + u64 used; u64 thresh; unsigned long flags; int ret = 0; + spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); + total = btrfs_super_total_bytes(&root->fs_info->super_copy); + used = btrfs_super_bytes_used(&root->fs_info->super_copy); if (for_del) thresh = total * 90; else @@ -92,7 +95,6 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, do_div(thresh, 100); - spin_lock_irqsave(&root->fs_info->delalloc_lock, flags); if (used + root->fs_info->delalloc_bytes + num_required > thresh) ret = -ENOSPC; spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags); @@ -115,7 +117,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) trans = btrfs_start_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); - mutex_unlock(&root->fs_info->fs_mutex); num_bytes = (end - start + blocksize) & ~(blocksize - 1); num_bytes = max(blocksize, num_bytes); @@ -160,7 +161,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end) btrfs_add_ordered_inode(inode); btrfs_update_inode(trans, root, inode); out: - mutex_lock(&root->fs_info->fs_mutex); btrfs_end_transaction(trans, root); return ret; } @@ -269,14 +269,13 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; - mutex_lock(&root->fs_info->fs_mutex); + if (btrfs_test_opt(root, NODATACOW) || btrfs_test_flag(inode, NODATACOW)) ret = run_delalloc_nocow(inode, start, end); else ret = cow_file_range(inode, start, end); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -349,17 +348,13 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_csum_one_bio(root, bio, &sums); BUG_ON(ret); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_set_trans_block_group(trans, inode); btrfs_csum_file_blocks(trans, root, inode, bio, sums); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); kfree(sums); @@ -404,7 +399,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) btrfs_test_flag(inode, NODATASUM)) return 0; - mutex_lock(&root->fs_info->fs_mutex); path = btrfs_alloc_path(); item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); if (IS_ERR(item)) { @@ -422,7 +416,6 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) out: if (path) btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -616,7 +609,6 @@ void btrfs_read_locked_inode(struct inode *inode) path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -662,8 +654,6 @@ void btrfs_read_locked_inode(struct inode *inode) btrfs_free_path(path); inode_item = NULL; - mutex_unlock(&root->fs_info->fs_mutex); - switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; @@ -691,9 +681,7 @@ void btrfs_read_locked_inode(struct inode *inode) return; make_bad: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); make_bad_inode(inode); } @@ -758,7 +746,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_last_trans(trans, inode); ret = 0; failed: - btrfs_release_path(root, path); btrfs_free_path(path); return ret; } @@ -849,7 +836,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) unsigned long nr = 0; root = BTRFS_I(dir)->root; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) @@ -871,7 +857,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -890,7 +875,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 1); if (ret) goto fail; @@ -907,7 +891,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) nr = trans->blocks_used; ret = btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); @@ -1129,7 +1112,6 @@ error: ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } - btrfs_release_path(root, path); btrfs_free_path(path); inode->i_sb->s_dirt = 1; return ret; @@ -1234,9 +1216,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= hole_start) goto out; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (err) goto fail; @@ -1245,7 +1225,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); hole_size = block_end - hole_start; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, root, inode, @@ -1262,7 +1241,6 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) btrfs_check_file(root, inode); } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); if (err) return err; @@ -1286,7 +1264,6 @@ void btrfs_delete_inode(struct inode *inode) } inode->i_size = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -1298,7 +1275,6 @@ void btrfs_delete_inode(struct inode *inode) clear_inode(inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return; @@ -1306,7 +1282,6 @@ void btrfs_delete_inode(struct inode *inode) no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); no_delete: @@ -1402,7 +1377,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); *sub_root = btrfs_read_fs_root(root->fs_info, location, dentry->d_name.name, @@ -1416,7 +1390,6 @@ static int fixup_tree_root_location(struct btrfs_root *root, location->offset = 0; btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return 0; } @@ -1482,9 +1455,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_inode_by_name(dir, dentry, &location); - mutex_unlock(&root->fs_info->fs_mutex); if (ret < 0) return ERR_PTR(ret); @@ -1559,7 +1530,6 @@ static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos = 1; } - mutex_lock(&root->fs_info->fs_mutex); key.objectid = inode->i_ino; path = btrfs_alloc_path(); path->reada = 2; @@ -1668,9 +1638,7 @@ read_dir_items: nopos: ret = 0; err: - btrfs_release_path(root, path); btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -1681,11 +1649,9 @@ int btrfs_write_inode(struct inode *inode, int wait) int ret = 0; if (wait) { - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } return ret; } @@ -1701,12 +1667,10 @@ void btrfs_dirty_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); } static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, @@ -1874,7 +1838,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1912,8 +1875,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -1934,7 +1895,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, unsigned long nr = 0; u64 objectid; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -1980,8 +1940,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2009,7 +1967,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, #else inc_nlink(inode); #endif - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto fail; @@ -2032,8 +1989,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction(trans, root); fail: - mutex_unlock(&root->fs_info->fs_mutex); - if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -2053,7 +2008,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 objectid = 0; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_unlock; @@ -2106,7 +2060,6 @@ out_fail: btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -2199,7 +2152,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, path = btrfs_alloc_path(); BUG_ON(!path); - mutex_lock(&root->fs_info->fs_mutex); again: spin_lock(&em_tree->lock); @@ -2402,7 +2354,6 @@ out: if (!err) err = ret; } - mutex_unlock(&root->fs_info->fs_mutex); if (err) { free_extent_map(em); WARN_ON(1); @@ -2584,9 +2535,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) int ret; u64 page_start; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) goto out; @@ -2631,7 +2580,6 @@ static void btrfs_truncate(struct inode *inode) btrfs_truncate_page(inode->i_mapping, inode->i_size); - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -2643,7 +2591,6 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction(trans, root); BUG_ON(ret); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); } @@ -2827,7 +2774,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; - struct btrfs_path *path; int ret; if (S_ISDIR(old_inode->i_mode) && new_inode && @@ -2835,7 +2781,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, return -ENOTEMPTY; } - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto out_unlock; @@ -2843,11 +2788,6 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out_fail; - } old_dentry->d_inode->i_nlink++; old_dir->i_ctime = old_dir->i_mtime = ctime; @@ -2869,10 +2809,8 @@ static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry, goto out_fail; out_fail: - btrfs_free_path(path); btrfs_end_transaction(trans, root); out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -2898,7 +2836,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - mutex_lock(&root->fs_info->fs_mutex); err = btrfs_check_free_space(root, 1, 0); if (err) goto out_fail; @@ -2979,7 +2916,6 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); out_fail: - mutex_unlock(&root->fs_info->fs_mutex); if (drop_inode) { inode_dec_link_count(inode); iput(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3fbf74e93dba..6002eb64daf9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -63,7 +63,6 @@ static noinline int create_subvol(struct btrfs_root *root, char *name, u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; unsigned long nr = 1; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_commit; @@ -164,7 +163,6 @@ fail: if (err && !ret) ret = err; fail_commit: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -181,7 +179,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) if (!root->ref_cows) return -EINVAL; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, 1, 0); if (ret) goto fail_unlock; @@ -208,7 +205,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen) err = btrfs_commit_transaction(trans, root); fail_unlock: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_btree_balance_dirty(root, nr); btrfs_throttle(root); return ret; @@ -228,9 +224,7 @@ int btrfs_defrag_file(struct file *file) unsigned long i; int ret; - mutex_lock(&root->fs_info->fs_mutex); ret = btrfs_check_free_space(root, inode->i_size, 0); - mutex_unlock(&root->fs_info->fs_mutex); if (ret) return -ENOSPC; @@ -315,7 +309,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) goto out; } - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); sizestr = vol_args->name; devstr = strchr(sizestr, ':'); if (devstr) { @@ -385,7 +380,8 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) } out_unlock: - mutex_unlock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); out: kfree(vol_args); return ret; @@ -428,11 +424,9 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, } root_dirid = root->fs_info->sb->s_root->d_inode->i_ino, - mutex_lock(&root->fs_info->fs_mutex); di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, root_dirid, vol_args->name, namelen, 0); - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); if (di && !IS_ERR(di)) { @@ -445,10 +439,12 @@ static noinline int btrfs_ioctl_snap_create(struct btrfs_root *root, goto out; } + mutex_lock(&root->fs_info->drop_mutex); if (root == root->fs_info->tree_root) ret = create_subvol(root, vol_args->name, namelen); else ret = create_snapshot(root, vol_args->name, namelen); + mutex_unlock(&root->fs_info->drop_mutex); out: kfree(vol_args); return ret; @@ -461,10 +457,8 @@ static int btrfs_ioctl_defrag(struct file *file) switch (inode->i_mode & S_IFMT) { case S_IFDIR: - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_root(root, 0); btrfs_defrag_root(root->fs_info->extent_root, 0); - mutex_unlock(&root->fs_info->fs_mutex); break; case S_IFREG: btrfs_defrag_file(file); @@ -588,7 +582,6 @@ long btrfs_ioctl_clone(struct file *file, unsigned long src_fd) unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 0); path = btrfs_alloc_path(); if (!path) { @@ -685,7 +678,6 @@ out: unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); out_unlock: mutex_unlock(&src->i_mutex); @@ -711,7 +703,6 @@ long btrfs_ioctl_trans_start(struct file *file) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&root->fs_info->fs_mutex); if (file->private_data) { ret = -EINPROGRESS; goto out; @@ -723,7 +714,6 @@ long btrfs_ioctl_trans_start(struct file *file) ret = -ENOMEM; /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } @@ -740,7 +730,6 @@ long btrfs_ioctl_trans_end(struct file *file) struct btrfs_trans_handle *trans; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); trans = file->private_data; if (!trans) { ret = -EINVAL; @@ -749,7 +738,6 @@ long btrfs_ioctl_trans_end(struct file *file) btrfs_end_transaction(trans, root); file->private_data = 0; out: - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 196d0e280b19..b61ded7a20c9 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -366,12 +366,10 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; - mutex_unlock(&root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1ed433a71493..5a1ee0665ae8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -370,6 +370,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) struct btrfs_trans_handle *trans; unsigned long nr; + smp_mb(); if (root->defrag_running) return 0; trans = btrfs_start_transaction(root, 1); @@ -378,16 +379,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) ret = btrfs_defrag_leaves(trans, root, cacheonly); nr = trans->blocks_used; btrfs_end_transaction(trans, root); - mutex_unlock(&info->fs_mutex); btrfs_btree_balance_dirty(info->tree_root, nr); cond_resched(); - mutex_lock(&info->fs_mutex); trans = btrfs_start_transaction(root, 1); if (ret != -EAGAIN) break; } root->defrag_running = 0; + smp_mb(); radix_tree_tag_clear(&info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_DEFRAG_TAG); @@ -435,14 +435,14 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, while(!list_empty(list)) { struct btrfs_root *root; - mutex_lock(&tree_root->fs_info->fs_mutex); dirty = list_entry(list->next, struct dirty_root, list); list_del_init(&dirty->list); num_bytes = btrfs_root_used(&dirty->root->root_item); root = dirty->latest_root; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); + mutex_lock(&root->fs_info->drop_mutex); while(1) { trans = btrfs_start_transaction(tree_root, 1); ret = btrfs_drop_snapshot(trans, dirty->root); @@ -459,14 +459,16 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); - mutex_unlock(&tree_root->fs_info->fs_mutex); + + mutex_unlock(&root->fs_info->drop_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); - mutex_lock(&tree_root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->drop_mutex); } BUG_ON(ret); - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); + mutex_lock(&root->fs_info->alloc_mutex); num_bytes -= btrfs_root_used(&dirty->root->root_item); bytes_used = btrfs_root_used(&root->root_item); if (num_bytes) { @@ -474,11 +476,15 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, btrfs_set_root_used(&root->root_item, bytes_used - num_bytes); } + mutex_unlock(&root->fs_info->alloc_mutex); + ret = btrfs_del_root(trans, tree_root, &dirty->root->root_key); if (ret) { BUG(); break; } + mutex_unlock(&root->fs_info->drop_mutex); + nr = trans->blocks_used; ret = btrfs_end_transaction(trans, tree_root); BUG_ON(ret); @@ -486,7 +492,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, free_extent_buffer(dirty->root->node); kfree(dirty->root); kfree(dirty); - mutex_unlock(&tree_root->fs_info->fs_mutex); btrfs_btree_balance_dirty(tree_root, nr); cond_resched(); @@ -503,7 +508,7 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, u64 objectid = 0; int ret; - root->fs_info->throttles++; + atomic_inc(&root->fs_info->throttles); while(1) { ret = btrfs_find_first_ordered_inode( &cur_trans->ordered_inode_tree, @@ -512,7 +517,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -521,7 +525,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, } iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } while(1) { @@ -533,7 +536,6 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, if (!ret) break; mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); if (S_ISREG(inode->i_mode)) { atomic_inc(&BTRFS_I(inode)->ordered_writeback); @@ -543,10 +545,9 @@ int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans, atomic_dec(&inode->i_count); iput(inode); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); } - root->fs_info->throttles--; + atomic_dec(&root->fs_info->throttles); return 0; } @@ -661,7 +662,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->trans_mutex); btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); ret = wait_for_commit(root, cur_trans); BUG_ON(ret); @@ -669,7 +669,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); mutex_unlock(&root->fs_info->trans_mutex); - mutex_lock(&root->fs_info->fs_mutex); return 0; } @@ -687,12 +686,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction, list); if (!prev_trans->commit_done) { prev_trans->use_count++; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); wait_for_commit(root, prev_trans); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); put_transaction(prev_trans); } @@ -709,12 +706,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, else timeout = 1; - mutex_unlock(&root->fs_info->fs_mutex); mutex_unlock(&root->fs_info->trans_mutex); schedule_timeout(timeout); - mutex_lock(&root->fs_info->fs_mutex); mutex_lock(&root->fs_info->trans_mutex); finish_wait(&cur_trans->writer_wait, &wait); ret = btrfs_write_ordered_inodes(trans, root); @@ -755,12 +750,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_copy_pinned(root, pinned_copy); mutex_unlock(&root->fs_info->trans_mutex); - mutex_unlock(&root->fs_info->fs_mutex); ret = btrfs_write_and_wait_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root); - mutex_lock(&root->fs_info->fs_mutex); btrfs_finish_extent_commit(trans, root, pinned_copy); mutex_lock(&root->fs_info->trans_mutex); @@ -781,9 +774,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); if (root->fs_info->closing) { - mutex_unlock(&root->fs_info->fs_mutex); drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots); - mutex_lock(&root->fs_info->fs_mutex); } return ret; } @@ -823,7 +814,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) unsigned long delay = HZ * 30; int ret; - mutex_lock(&root->fs_info->fs_mutex); + smp_mb(); if (root->fs_info->closing) goto out; @@ -844,7 +835,6 @@ void btrfs_transaction_cleaner(struct work_struct *work) trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); out: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_clean_old_snapshots(root); btrfs_transaction_queue_work(root, delay); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ba3968571024..869864ddcc29 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -866,7 +866,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) u64 devid; int ret = 0; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&uuid_mutex); all_avail = root->fs_info->avail_data_alloc_bits | @@ -984,7 +985,8 @@ error_close: close_bdev_excl(bdev); out: mutex_unlock(&uuid_mutex); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); return ret; } @@ -1003,7 +1005,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!bdev) { return -EIO; } - mutex_lock(&root->fs_info->fs_mutex); + + mutex_lock(&root->fs_info->alloc_mutex); + mutex_lock(&root->fs_info->chunk_mutex); + trans = btrfs_start_transaction(root, 1); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { @@ -1057,7 +1062,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->open_devices++; out: btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); + mutex_unlock(&root->fs_info->alloc_mutex); + return ret; out_close_bdev: @@ -1297,9 +1304,10 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_key found_key; + BUG(); /* FIXME, needs locking */ + dev_root = dev_root->fs_info->dev_root; - mutex_lock(&dev_root->fs_info->fs_mutex); /* step one make some room on all the devices */ list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1368,7 +1376,6 @@ int btrfs_balance(struct btrfs_root *dev_root) ret = 0; error: btrfs_free_path(path); - mutex_unlock(&dev_root->fs_info->fs_mutex); return ret; } diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index f63488dc2f16..3e9eb91c3c89 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -153,7 +153,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); @@ -181,7 +180,6 @@ ssize_t btrfs_xattr_get(struct inode *inode, int name_index, ret = btrfs_dir_data_len(leaf, di); out: - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); return ret; @@ -210,7 +208,6 @@ int btrfs_xattr_set(struct inode *inode, int name_index, return -ENOMEM; } - mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -260,7 +257,6 @@ out: } btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->fs_mutex); kfree(name); btrfs_free_path(path); @@ -297,8 +293,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) return -ENOMEM; path->reada = 2; - mutex_lock(&root->fs_info->fs_mutex); - /* search for our xattrs */ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) @@ -379,15 +373,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = total_size; err: - mutex_unlock(&root->fs_info->fs_mutex); btrfs_free_path(path); return ret; } /* - * delete all the xattrs associated with the inode. fs_mutex should be - * held when we come into here + * delete all the xattrs associated with the inode. */ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) -- cgit v1.2.3 From a74a4b97b61beede185b4b3ad359d7d378b0d312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Replace the transaction work queue with kthreads This creates one kthread for commits and one kthread for deleting old snapshots. All the work queues are removed. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 5 ++- fs/btrfs/ctree.h | 13 ++---- fs/btrfs/disk-io.c | 116 +++++++++++++++++++++++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 10 ++--- fs/btrfs/super.c | 16 +++---- fs/btrfs/transaction.c | 72 +----------------------------- fs/btrfs/transaction.h | 10 ----- fs/btrfs/volumes.c | 12 +++-- 8 files changed, 136 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5edbcc09b3cc..40f0e0cb804b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1352,6 +1352,8 @@ again: free_extent_buffer(tmp); goto again; } else { + if (tmp) + free_extent_buffer(tmp); b = read_node_slot(root, b, slot); } } @@ -3048,7 +3050,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + if (!path->skip_locking) + path->locks[level] = 1; if (!level) break; if (level == 1 && path->locks[1] && path->reada) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9bbb53eda63..244fe86bcc55 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -519,15 +518,14 @@ struct btrfs_fs_info { struct backing_dev_info bdi; spinlock_t hash_lock; struct mutex trans_mutex; + struct mutex transaction_kthread_mutex; + struct mutex cleaner_mutex; struct mutex alloc_mutex; struct mutex chunk_mutex; struct mutex drop_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; - struct list_head end_io_work_list; - struct work_struct end_io_work; - spinlock_t end_io_work_lock; atomic_t nr_async_submits; /* @@ -543,13 +541,10 @@ struct btrfs_fs_info { struct btrfs_workers workers; struct btrfs_workers endio_workers; struct btrfs_workers submit_workers; + struct task_struct *transaction_kthread; + struct task_struct *cleaner_kthread; int thread_pool_size; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct work_struct trans_work; -#else - struct delayed_work trans_work; -#endif struct kobject super_kobj; struct completion kobj_unregister; int do_barriers; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 52569b57692d..31ca9f89388d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -24,6 +25,12 @@ #include #include // for block_sync_page #include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work) #endif } +static int cleaner_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->cleaner_mutex); +printk("cleaner awake\n"); + btrfs_clean_old_snapshots(root); +printk("cleaner done\n"); + mutex_unlock(&root->fs_info->cleaner_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + smp_mb(); + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +static int transaction_kthread(void *arg) +{ + struct btrfs_root *root = arg; + struct btrfs_trans_handle *trans; + struct btrfs_transaction *cur; + unsigned long now; + unsigned long delay; + int ret; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + delay = HZ * 30; + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto sleep; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto sleep; + } + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +sleep: + wake_up_process(root->fs_info->cleaner_kthread); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -#else - INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); + mutex_init(&fs_info->transaction_kthread_mutex); + mutex_init(&fs_info->cleaner_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, + "btrfs-cleaner"); + if (!fs_info->cleaner_kthread) + goto fail_extent_root; + + fs_info->transaction_kthread = kthread_run(transaction_kthread, + tree_root, + "btrfs-transaction"); + if (!fs_info->transaction_kthread) + goto fail_trans_kthread; + return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); - btrfs_transaction_flush_work(root); + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + btrfs_defrag_dirty_roots(root->fs_info); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); - btrfs_transaction_flush_work(root); - if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 6274f30031db..89cc4f611869 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1216,15 +1216,16 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, if (ret == -ENOSPC) { printk("space info full %Lu\n", flags); space_info->full = 1; - goto out; + goto out_unlock; } BUG_ON(ret); ret = btrfs_make_block_group(trans, extent_root, 0, flags, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); BUG_ON(ret); -out: +out_unlock: mutex_unlock(&extent_root->fs_info->chunk_mutex); +out: return 0; } @@ -2274,7 +2275,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); mutex_unlock(&root->fs_info->alloc_mutex); - reada_walk_down(root, cur, path->slots[*level]); + if (path->slots[*level] == 0) + reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); @@ -2446,8 +2448,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root break; if (wret < 0) ret = wret; - ret = -EAGAIN; - break; } for (i = 0; i <= orig_level; i++) { if (path->nodes[i]) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b61ded7a20c9..726d6871fa13 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -340,7 +340,6 @@ static int btrfs_fill_super(struct super_block * sb, goto fail_close; sb->s_root = root_dentry; - btrfs_transaction_queue_work(tree_root, HZ * 30); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); @@ -416,9 +415,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_free_subvol_name; bdev = fs_devices->latest_bdev; - btrfs_lock_volumes(); s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); - btrfs_unlock_volumes(); if (IS_ERR(s)) goto error_s; @@ -530,13 +527,15 @@ out: static void btrfs_write_super_lockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_flush_work(root); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + mutex_lock(&root->fs_info->cleaner_mutex); } static void btrfs_unlockfs(struct super_block *sb) { struct btrfs_root *root = btrfs_sb(sb); - btrfs_transaction_queue_work(root, HZ * 30); + mutex_unlock(&root->fs_info->cleaner_mutex); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); } static struct super_operations btrfs_super_ops = { @@ -589,10 +588,9 @@ static int __init init_btrfs_fs(void) if (err) return err; - btrfs_init_transaction_sys(); err = btrfs_init_cachep(); if (err) - goto free_transaction_sys; + goto free_sysfs; err = extent_io_init(); if (err) @@ -618,15 +616,13 @@ free_extent_io: extent_io_exit(); free_cachep: btrfs_destroy_cachep(); -free_transaction_sys: - btrfs_exit_transaction_sys(); +free_sysfs: btrfs_exit_sysfs(); return err; } static void __exit exit_btrfs_fs(void) { - btrfs_exit_transaction_sys(); btrfs_destroy_cachep(); extent_map_exit(); extent_io_exit(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 69ed5f85a387..0c53ff775b92 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -29,8 +29,6 @@ static int total_trans = 0; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; -static struct workqueue_struct *trans_wq; - #define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_DEFRAG_TAG 1 @@ -807,81 +805,15 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; INIT_LIST_HEAD(&dirty_roots); - +again: mutex_lock(&root->fs_info->trans_mutex); list_splice_init(&root->fs_info->dead_roots, &dirty_roots); mutex_unlock(&root->fs_info->trans_mutex); if (!list_empty(&dirty_roots)) { drop_dirty_roots(root, &dirty_roots); + goto again; } return 0; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p) -#else -void btrfs_transaction_cleaner(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - trans_work.work); - -#endif - struct btrfs_root *root = fs_info->tree_root; - struct btrfs_transaction *cur; - struct btrfs_trans_handle *trans; - unsigned long now; - unsigned long delay = HZ * 30; - int ret; - - smp_mb(); - if (root->fs_info->closing) - goto out; - - mutex_lock(&root->fs_info->trans_mutex); - cur = root->fs_info->running_transaction; - if (!cur) { - mutex_unlock(&root->fs_info->trans_mutex); - goto out; - } - now = get_seconds(); - if (now < cur->start_time || now - cur->start_time < 30) { - mutex_unlock(&root->fs_info->trans_mutex); - delay = HZ * 5; - goto out; - } - mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_commit_transaction(trans, root); -out: - btrfs_clean_old_snapshots(root); - btrfs_transaction_queue_work(root, delay); -} - -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay) -{ - if (!root->fs_info->closing) - queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay); -} - -void btrfs_transaction_flush_work(struct btrfs_root *root) -{ - cancel_delayed_work(&root->fs_info->trans_work); - flush_workqueue(trans_wq); -} - -void __init btrfs_init_transaction_sys(void) -{ - trans_wq = create_workqueue("btrfs-transaction"); -} - -void btrfs_exit_transaction_sys(void) -{ - destroy_workqueue(trans_wq); -} diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 52559b51b181..e1e5a06b65f4 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -82,16 +82,6 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_transaction_cleaner(void *p); -#else -void btrfs_transaction_cleaner(struct work_struct *work); -#endif - -void btrfs_transaction_flush_work(struct btrfs_root *root); -void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); -void btrfs_init_transaction_sys(void); -void btrfs_exit_transaction_sys(void); int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 869864ddcc29..4e7cee27aab5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -271,13 +271,17 @@ again: list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->in_fs_metadata) { - if (device->bdev) { - close_bdev_excl(device->bdev); - fs_devices->open_devices--; - } + struct block_device *bdev; list_del(&device->dev_list); list_del(&device->dev_alloc_list); fs_devices->num_devices--; + if (device->bdev) { + bdev = device->bdev; + fs_devices->open_devices--; + mutex_unlock(&uuid_mutex); + close_bdev_excl(bdev); + mutex_lock(&uuid_mutex); + } kfree(device->name); kfree(device); goto again; -- cgit v1.2.3 From 3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Online btree defragmentation fixes The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 170 ++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 7 +- fs/btrfs/disk-io.c | 61 +----------------- fs/btrfs/disk-io.h | 6 -- fs/btrfs/extent-tree.c | 2 - fs/btrfs/super.c | 1 - fs/btrfs/transaction.c | 35 +--------- fs/btrfs/transaction.h | 1 - fs/btrfs/tree-defrag.c | 36 ++++++----- 9 files changed, 190 insertions(+), 129 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7f4cc2b88d09..0cb80f32a9c7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -63,10 +63,9 @@ void btrfs_free_path(struct btrfs_path *p) void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; - int keep = p->keep_locks; - int skip = p->skip_locking; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + p->slots[i] = 0; if (!p->nodes[i]) continue; if (p->locks[i]) { @@ -74,10 +73,8 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); + p->nodes[i] = NULL; } - memset(p, 0, sizeof(*p)); - p->keep_locks = keep; - p->skip_locking = skip; } struct extent_buffer *btrfs_root_node(struct btrfs_root *root) @@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = cur->start; last_block = cur->start; *last_ret = search_start; - if (parent_level == 1) - btrfs_clear_buffer_defrag(cur); btrfs_tree_unlock(cur); free_extent_buffer(cur); } @@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) return 1; } +/* + * A helper function to walk down the tree starting at min_key, and looking + * for nodes or leaves that are either in cache or have a minimum + * transaction id. This is used by the btree defrag code, but could + * also be used to search for blocks that have changed since a given + * transaction id. + * + * This does not cow, but it does stuff the starting key it finds back + * into min_key, so you can call btrfs_search_slot with cow=1 on the + * key and get a writable path. + * + * This does lock as it descends, and path->keep_locks should be set + * to 1 by the caller. + * + * This honors path->lowest_level to prevent descent past a given level + * of the tree. + * + * returns zero if something useful was found, < 0 on error and 1 if there + * was nothing in the tree that matched the search criteria. + */ +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans) +{ + struct extent_buffer *cur; + struct btrfs_key found_key; + int slot; + u32 nritems; + int level; + int ret = 1; + +again: + cur = btrfs_lock_root_node(root); + level = btrfs_header_level(cur); + path->nodes[level] = cur; + path->locks[level] = 1; + + if (btrfs_header_generation(cur) < min_trans) { + ret = 1; + goto out; + } + while(1) { + nritems = btrfs_header_nritems(cur); + level = btrfs_header_level(cur); + bin_search(cur, min_key, level, &slot); + + /* at level = 0, we're done, setup the path and exit */ + if (level == 0) { + ret = 0; + path->slots[level] = slot; + btrfs_item_key_to_cpu(cur, &found_key, slot); + goto out; + } + /* + * check this node pointer against the cache_only and + * min_trans parameters. If it isn't in cache or is too + * old, skip to the next one. + */ + while(slot < nritems) { + u64 blockptr; + u64 gen; + struct extent_buffer *tmp; + blockptr = btrfs_node_blockptr(cur, slot); + gen = btrfs_node_ptr_generation(cur, slot); + if (gen < min_trans) { + slot++; + continue; + } + if (!cache_only) + break; + + tmp = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + + if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + free_extent_buffer(tmp); + break; + } + if (tmp) + free_extent_buffer(tmp); + slot++; + } + /* + * we didn't find a candidate key in this node, walk forward + * and find another one + */ + if (slot >= nritems) { + ret = btrfs_find_next_key(root, path, min_key, level, + cache_only, min_trans); + if (ret == 0) { + btrfs_release_path(root, path); + goto again; + } else { + goto out; + } + } + /* save our key for returning back */ + btrfs_node_key_to_cpu(cur, &found_key, slot); + path->slots[level] = slot; + if (level == path->lowest_level) { + ret = 0; + unlock_up(path, level, 1); + goto out; + } + cur = read_node_slot(root, cur, slot); + + btrfs_tree_lock(cur); + path->locks[level - 1] = 1; + path->nodes[level - 1] = cur; + unlock_up(path, level, 1); + } +out: + if (ret == 0) + memcpy(min_key, &found_key, sizeof(found_key)); + return ret; +} + +/* + * this is similar to btrfs_next_leaf, but does not try to preserve + * and fixup the path. It looks for and returns the next key in the + * tree based on the current path and the cache_only and min_trans + * parameters. + * + * 0 is returned if another key is found, < 0 if there are any errors + * and 1 is returned if there are no higher keys in the tree + * + * path->keep_locks should be set to 1 on the search made before + * calling this function. + */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level) + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans) { int level = lowest_level; int slot; @@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, slot = path->slots[level] + 1; c = path->nodes[level]; +next: if (slot >= btrfs_header_nritems(c)) { level++; if (level == BTRFS_MAX_LEVEL) { @@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, } if (level == 0) btrfs_item_key_to_cpu(c, key, slot); - else + else { + u64 blockptr = btrfs_node_blockptr(c, slot); + u64 gen = btrfs_node_ptr_generation(c, slot); + + if (cache_only) { + struct extent_buffer *cur; + cur = btrfs_find_tree_block(root, blockptr, + btrfs_level_size(root, level - 1)); + if (!cur || !btrfs_buffer_uptodate(cur, gen)) { + slot++; + if (cur) + free_extent_buffer(cur); + goto next; + } + free_extent_buffer(cur); + } + if (gen < min_trans) { + slot++; + goto next; + } btrfs_node_key_to_cpu(c, key, slot); + } return 0; } return 1; @@ -3095,6 +3241,12 @@ done: return 0; } +/* + * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps + * searching until it gets past min_objectid or finds an item of 'type' + * + * returns 0 if something is found, 1 if nothing was found and < 0 on error + */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ca8e6f15859e..a28796482b4a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -609,6 +609,7 @@ struct btrfs_root { u64 last_inode_alloc; int ref_cows; int track_dirty; + u64 defrag_trans_start; struct btrfs_key defrag_progress; struct btrfs_key defrag_max; int defrag_running; @@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root, struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int lowest_level); + struct btrfs_key *key, int lowest_level, + int cache_only, u64 min_trans); +int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, + struct btrfs_path *path, int cache_only, + u64 min_trans); int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31ca9f89388d..4cdc0b6a2672 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { ret = -EIO; @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root, "btrfs-transaction"); if (!fs_info->transaction_kthread) - goto fail_trans_kthread; + goto fail_cleaner; return tree_root; -fail_trans_kthread: +fail_cleaner: kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root) kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); - btrfs_defrag_dirty_roots(root->fs_info); btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) return; } -void btrfs_set_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); -} - -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, - GFP_NOFS); -} - -int btrfs_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); -} - -int btrfs_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, 0); -} - -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, GFP_NOFS); -} - -int btrfs_clear_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG, GFP_NOFS); -} - int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index deff6b4815a7..353c3c50c957 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); -void btrfs_set_buffer_defrag(struct extent_buffer *buf); -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_buffer_defrag(struct extent_buffer *buf); -int btrfs_buffer_defrag_done(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag(struct extent_buffer *buf); -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index dc3c03c6612d..5e0857ffbc35 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, set_extent_dirty(&trans->transaction->dirty_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); - if (!btrfs_test_opt(root, SSD)) - btrfs_set_buffer_defrag(buf); trans->blocks_used++; return buf; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 726d6871fa13..5e28cf5c2e85 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } btrfs_clean_old_snapshots(root); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sb->s_dirt = 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8e909cb97c6d..98f422d9ab07 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -30,7 +30,6 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; #define BTRFS_ROOT_TRANS_TAG 0 -#define BTRFS_ROOT_DEFRAG_TAG 1 static noinline void put_transaction(struct btrfs_transaction *transaction) { @@ -92,9 +91,6 @@ static noinline int record_root_in_trans(struct btrfs_root *root) radix_tree_tag_set(&root->fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); - radix_tree_tag_set(&root->fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); root->commit_root = btrfs_root_node(root); } else { WARN_ON(1); @@ -403,44 +399,15 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) cond_resched(); trans = btrfs_start_transaction(root, 1); - if (ret != -EAGAIN) + if (root->fs_info->closing || ret != -EAGAIN) break; } root->defrag_running = 0; smp_mb(); - radix_tree_tag_clear(&info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - BTRFS_ROOT_DEFRAG_TAG); btrfs_end_transaction(trans, root); return 0; } -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info) -{ - struct btrfs_root *gang[1]; - struct btrfs_root *root; - int i; - int ret; - int err = 0; - u64 last = 0; - - while(1) { - ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix, - (void **)gang, last, - ARRAY_SIZE(gang), - BTRFS_ROOT_DEFRAG_TAG); - if (ret == 0) - break; - for (i = 0; i < ret; i++) { - root = gang[i]; - last = root->root_key.objectid + 1; - btrfs_defrag_root(root, 1); - } - } - btrfs_defrag_root(info->extent_root, 1); - return err; -} - static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index e1e5a06b65f4..9ccd5a5b170f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -84,7 +84,6 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, struct list_head *dead_list); -int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b17693f61fbc..cc2650b06952 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -32,10 +32,13 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int wret; int level; int orig_level; - int i; int is_extent = 0; int next_key_ret = 0; u64 last_ret = 0; + u64 min_trans = 0; + + if (cache_only) + goto out; if (root->fs_info->extent_root == root) { /* @@ -43,10 +46,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, * we can't defrag the extent root without deadlock */ goto out; -#if 0 - mutex_lock(&root->fs_info->alloc_mutex); - is_extent = 1; -#endif } if (root->ref_cows == 0 && !is_extent) @@ -84,6 +83,17 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, path->lowest_level = 1; path->keep_locks = 1; + if (cache_only) + min_trans = root->defrag_trans_start; + + ret = btrfs_search_forward(root, &key, path, cache_only, min_trans); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + btrfs_release_path(root, path); wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { @@ -95,7 +105,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1); + next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, + min_trans); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, cache_only, &last_ret, @@ -106,19 +117,9 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = -EAGAIN; } - for (i = 1; i < BTRFS_MAX_LEVEL; i++) { - if (path->locks[i]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; - } - if (path->nodes[i]) { - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - } - } + btrfs_release_path(root, path); if (is_extent) btrfs_extent_post_op(trans, root); - out: if (is_extent) mutex_unlock(&root->fs_info->alloc_mutex); @@ -138,6 +139,7 @@ done: if (ret != -EAGAIN) { memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_trans_start = trans->transid; } return ret; } -- cgit v1.2.3 From b3c3da71ed88c06bc18a4d8919c6e4af3b933566 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 Jul 2008 12:12:13 -0400 Subject: Btrfs: Add version strings on module load Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 6 +++++- fs/btrfs/super.c | 3 +++ fs/btrfs/version.sh | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/version.sh (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 0e2dcc718de2..f8a38b0da7fb 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -14,8 +14,12 @@ else # Normal Makefile KERNELDIR := /lib/modules/`uname -r`/build -all: +all: version $(MAKE) -C $(KERNELDIR) M=`pwd` modules + +version: + bash version.sh + modules_install: $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install clean: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5e28cf5c2e85..4cb6aac5122e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -45,6 +45,7 @@ #include "print-tree.h" #include "xattr.h" #include "volumes.h" +#include "version.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -605,6 +606,8 @@ static int __init init_btrfs_fs(void) err = register_filesystem(&btrfs_fs_type); if (err) goto unregister_ioctl; + + printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION); return 0; unregister_ioctl: diff --git a/fs/btrfs/version.sh b/fs/btrfs/version.sh new file mode 100644 index 000000000000..fd9b53d39860 --- /dev/null +++ b/fs/btrfs/version.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# determine-version -- report a useful version for releases +# +# Copyright 2008, Aron Griffis +# Copyright 2008, Oracle +# Released under the GNU GPLv2 + +v="Btrfs v0.15" + +which hg > /dev/null +if [ $? == 0 ]; then + last=$(hg tags | grep -m1 -o '^v[0-9.]\+') + + # now check if the repo has commits since then... + if [[ $(hg id -t) == $last || \ + $(hg di -r "$last:." | awk '/^diff/{print $NF}' | sort -u) == .hgtags ]] + then + # check if it's dirty + if [[ $(hg id | cut -d' ' -f1) == *+ ]]; then + v=$last+ + else + v=$last + fi + else + # includes dirty flag + v=$last+$(hg id -i) + fi +fi + +echo "#ifndef __BUILD_VERSION" > .build-version.h +echo "#define __BUILD_VERSION" >> .build-version.h +echo "#define BTRFS_BUILD_VERSION \"Btrfs $v\"" >> .build-version.h +echo "#endif" >> .build-version.h + +diff -q version.h .build-version.h >& /dev/null + +if [ $? == 0 ]; then + rm .build-version.h + exit 0 +fi + +mv .build-version.h version.h -- cgit v1.2.3 From 33268eaf0b3db5e2bd12c0ada81a8e8f87a46d68 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:16:36 -0400 Subject: Btrfs: Add ACL support Signed-off-by: Chris Mason --- fs/btrfs/acl.c | 296 +++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/btrfs_inode.h | 3 + fs/btrfs/ctree.h | 7 ++ fs/btrfs/inode.c | 42 ++++++- fs/btrfs/super.c | 9 +- fs/btrfs/xattr.c | 26 +++-- 6 files changed, 335 insertions(+), 48 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index ff0fcc72848a..b192659b4f5f 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -20,76 +20,302 @@ #include #include #include +#include #include + #include "ctree.h" +#include "btrfs_inode.h" #include "xattr.h" -#ifndef is_owner_or_cap -#define is_owner_or_cap(inode) \ - ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) -#endif + +static void btrfs_update_cached_acl(struct inode *inode, + struct posix_acl **p_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(*p_acl); + *p_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +{ + int size, name_index; + char *value = NULL; + struct posix_acl *acl = NULL, **p_acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + p_acl = &BTRFS_I(inode)->i_acl; + break; + case ACL_TYPE_DEFAULT: + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + p_acl = &BTRFS_I(inode)->i_default_acl; + break; + default: + return ERR_PTR(-EINVAL); + } + + spin_lock(&inode->i_lock); + if (*p_acl != BTRFS_ACL_NOT_CACHED) + acl = posix_acl_dup(*p_acl); + spin_unlock(&inode->i_lock); + + if (acl) + return acl; + + + size = btrfs_xattr_get(inode, name_index, "", NULL, 0); + if (size > 0) { + value = kzalloc(size, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); + size = btrfs_xattr_get(inode, name_index, "", value, size); + if (size > 0) { + acl = posix_acl_from_xattr(value, size); + btrfs_update_cached_acl(inode, p_acl, acl); + } + kfree(value); + } else if (size == -ENOENT) { + acl = NULL; + btrfs_update_cached_acl(inode, p_acl, acl); + } + + return acl; +} + +static int btrfs_xattr_get_acl(struct inode *inode, int type, + void *value, size_t size) +{ + struct posix_acl *acl; + int ret = 0; + + acl = btrfs_get_acl(inode, type); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + ret = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return ret; +} + +/* + * Needs to be called with fs_mutex held + */ +static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + int ret, name_index = 0, size = 0; + struct posix_acl **p_acl; + char *value = NULL; + mode_t mode; + + if (acl) { + ret = posix_acl_valid(acl); + if (ret < 0) + return ret; + ret = 0; + } + + switch (type) { + case ACL_TYPE_ACCESS: + mode = inode->i_mode; + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + ret = 0; + inode->i_mode = mode; + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS; + p_acl = &BTRFS_I(inode)->i_acl; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EINVAL : 0; + name_index = BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT; + p_acl = &BTRFS_I(inode)->i_default_acl; + break; + default: + return -EINVAL; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_NOFS); + if (!value) { + ret = -ENOMEM; + goto out; + } + + ret = posix_acl_to_xattr(acl, value, size); + if (ret < 0) + goto out; + } + + ret = btrfs_xattr_set(inode, name_index, "", value, size, 0); + +out: + if (value) + kfree(value); + + if (!ret) + btrfs_update_cached_acl(inode, p_acl, acl); + + return ret; +} static int btrfs_xattr_set_acl(struct inode *inode, int type, const void *value, size_t size) { int ret = 0; - struct posix_acl *acl; + struct posix_acl *acl = NULL; - if (!is_owner_or_cap(inode)) - return -EPERM; if (value) { acl = posix_acl_from_xattr(value, size); if (acl == NULL) { value = NULL; size = 0; } else if (IS_ERR(acl)) { - ret = PTR_ERR(acl); - } else { - ret = posix_acl_valid(acl); - posix_acl_release(acl); + return PTR_ERR(acl); } - if (ret) - return ret; } - return btrfs_xattr_set(inode, type, "", value, size, 0); -} -static int btrfs_xattr_get_acl(struct inode *inode, int type, - void *value, size_t size) -{ - return btrfs_xattr_get(inode, type, "", value, size); + ret = btrfs_set_acl(inode, acl, type); + + posix_acl_release(acl); + + return ret; } + + static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name, void *value, size_t size) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, - value, size); + return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size); } + static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS, - value, size); + return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); } + static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name, void *value, size_t size) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_get_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - value, size); + return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size); } + static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (*name != '\0') - return -EINVAL; - return btrfs_xattr_set_acl(inode, BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT, - value, size); + return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +int btrfs_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl; + int error = -EAGAIN; + + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + } + + return error; +} + +/* + * btrfs_init_acl is already generally called under fs_mutex, so the locking + * stuff has been fixed to work with that. If the locking stuff changes, we + * need to re-evaluate the acl locking stuff. + */ +int btrfs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int ret = 0; + + /* this happens with subvols */ + if (!dir) + return 0; + + if (!S_ISLNK(inode->i_mode)) { + if (IS_POSIXACL(dir)) { + acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + + if (IS_POSIXACL(dir) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT); + if (ret) + goto failed; + } + clone = posix_acl_clone(acl, GFP_NOFS); + ret = -ENOMEM; + if (!clone) + goto failed; + + mode = inode->i_mode; + ret = posix_acl_create_masq(clone, &mode); + if (ret >= 0) { + inode->i_mode = mode; + if (ret > 0) { + /* we need an acl */ + ret = btrfs_set_acl(inode, clone, + ACL_TYPE_ACCESS); + } + } + } +failed: + posix_acl_release(acl); + + return ret; +} + +int btrfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int ret = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!IS_POSIXACL(inode)) + return 0; + + acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + + ret = posix_acl_chmod_masq(clone, inode->i_mode); + if (!ret) + ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS); + + posix_acl_release(clone); + + return ret; } + struct xattr_handler btrfs_xattr_acl_default_handler = { .prefix = POSIX_ACL_XATTR_DEFAULT, .list = btrfs_xattr_generic_list, diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b7bd60e4fdd7..9f2a4ef944a7 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -36,6 +36,9 @@ struct btrfs_inode { struct inode vfs_inode; struct btrfs_ordered_inode_tree ordered_tree; + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; + /* * transid of the trans_handle that last modified this inode */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 53d315bdd16d..f87d7263f2d7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -42,6 +42,8 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_B5RfS_M" +#define BTRFS_ACL_NOT_CACHED ((void *)-1) + #ifdef CONFIG_LOCKDEP # define BTRFS_MAX_LEVEL 7 #else @@ -1694,4 +1696,9 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); u64 btrfs_parse_size(char *str); int btrfs_parse_options(struct btrfs_root *root, char *options); int btrfs_sync_fs(struct super_block *sb, int wait); + +/* acl.c */ +int btrfs_check_acl(struct inode *inode, int mask); +int btrfs_init_acl(struct inode *inode, struct inode *dir); +int btrfs_acl_chmod(struct inode *inode); #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d371d6fe551..2d8853543a71 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1478,6 +1479,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) } out: err = inode_setattr(inode, attr); + + if (!err && ((attr->ia_valid & ATTR_MODE))) + err = btrfs_acl_chmod(inode); fail: return err; } @@ -2184,6 +2188,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -2239,6 +2249,12 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -2366,6 +2382,11 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } drop_on_err = 1; + + err = btrfs_init_acl(inode, dir); + if (err) + goto out_fail; + inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; btrfs_set_trans_block_group(trans, inode); @@ -3023,6 +3044,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->last_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); + ei->i_acl = BTRFS_ACL_NOT_CACHED; + ei->i_default_acl = BTRFS_ACL_NOT_CACHED; return &ei->vfs_inode; } @@ -3032,6 +3055,13 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); + if (BTRFS_I(inode)->i_acl && + BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(BTRFS_I(inode)->i_acl); + if (BTRFS_I(inode)->i_default_acl && + BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) + posix_acl_release(BTRFS_I(inode)->i_default_acl); + while(1) { ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); if (!ordered) @@ -3230,6 +3260,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; + err = btrfs_init_acl(inode, dir); + if (err) { + drop_inode = 1; + goto out_unlock; + } + btrfs_set_trans_block_group(trans, inode); err = btrfs_add_nondir(trans, dentry, inode, 0); if (err) @@ -3310,7 +3346,7 @@ static int btrfs_permission(struct inode *inode, int mask, { if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) return -EACCES; - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, btrfs_check_acl); } static struct inode_operations btrfs_dir_inode_operations = { @@ -3392,6 +3428,10 @@ static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, .setattr = btrfs_setattr, .permission = btrfs_permission, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = generic_removexattr, }; static struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 4cb6aac5122e..a6a418b6894b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -68,7 +68,7 @@ static void btrfs_put_super (struct super_block * sb) enum { Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_thread_pool, Opt_err, + Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_err, }; static match_table_t tokens = { @@ -83,7 +83,8 @@ static match_table_t tokens = { {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, {Opt_ssd, "ssd"}, - {Opt_err, NULL} + {Opt_noacl, "noacl"}, + {Opt_err, NULL}, }; u64 btrfs_parse_size(char *str) @@ -215,6 +216,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->alloc_start); } break; + case Opt_noacl: + root->fs_info->sb->s_flags &= ~MS_POSIXACL; + break; default: break; } @@ -301,6 +305,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_op = &btrfs_super_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; + sb->s_flags |= MS_POSIXACL; tree_root = open_ctree(sb, fs_devices, (char *)data); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6730b5958844..121c9550314f 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -26,25 +26,27 @@ #include "transaction.h" #include "xattr.h" #include "disk-io.h" + static struct xattr_handler *btrfs_xattr_handler_map[] = { [BTRFS_XATTR_INDEX_USER] = &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL -// [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, -// [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &btrfs_xattr_acl_access_handler, + [BTRFS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &btrfs_xattr_acl_default_handler, #endif [BTRFS_XATTR_INDEX_TRUSTED] = &btrfs_xattr_trusted_handler, [BTRFS_XATTR_INDEX_SECURITY] = &btrfs_xattr_security_handler, -// [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, + [BTRFS_XATTR_INDEX_SYSTEM] = &btrfs_xattr_system_handler, }; + struct xattr_handler *btrfs_xattr_handlers[] = { &btrfs_xattr_user_handler, #ifdef CONFIG_FS_POSIX_ACL -// &btrfs_xattr_acl_access_handler, -// &btrfs_xattr_acl_default_handler, + &btrfs_xattr_acl_access_handler, + &btrfs_xattr_acl_default_handler, #endif &btrfs_xattr_trusted_handler, &btrfs_xattr_security_handler, -// &btrfs_xattr_system_handler, + &btrfs_xattr_system_handler, NULL, }; @@ -237,10 +239,14 @@ int btrfs_xattr_set(struct inode *inode, int name_index, mod = 1; goto out; } - } else if (flags & XATTR_REPLACE) { - /* we couldn't find the attr to replace, so error out */ - ret = -ENODATA; - goto out; + } else { + btrfs_release_path(root, path); + + if (flags & XATTR_REPLACE) { + /* we couldn't find the attr to replace */ + ret = -ENODATA; + goto out; + } } /* ok we have to create a completely new xattr */ -- cgit v1.2.3 From b48652c101cce7a54379a49cc0cf854cec2c94e2 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 4 Aug 2008 23:23:47 -0400 Subject: Btrfs: Various small fixes. This trivial patch contains two locking fixes and a off by one fix. --- Signed-off-by: Chris Mason --- fs/btrfs/ordered-data.c | 4 ++-- fs/btrfs/root-tree.c | 3 +-- fs/btrfs/super.c | 2 ++ fs/btrfs/transaction.c | 9 +++++---- fs/btrfs/transaction.h | 3 +-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 676e4bd65c52..db200e6baf7e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -548,7 +548,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, */ test = rb_entry(node, struct btrfs_ordered_extent, rb_node); if (test->file_offset > entry_end(ordered)) { - i_size_test = test->file_offset - 1; + i_size_test = test->file_offset; } } else { i_size_test = i_size_read(inode); @@ -561,7 +561,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, * disk_i_size to the end of the region. */ if (i_size_test > entry_end(ordered) && - !test_range_bit(io_tree, entry_end(ordered), i_size_test, + !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, EXTENT_DELALLOC, 0)) { new_i_size = min_t(u64, i_size_test, i_size_read(inode)); } diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index a5c0e98b5aeb..36726696e58b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -209,8 +209,7 @@ again: goto err; } - ret = btrfs_add_dead_root(dead_root, latest, - &root->fs_info->dead_roots); + ret = btrfs_add_dead_root(dead_root, latest); if (ret) goto err; goto again; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a6a418b6894b..eb4b357d05e1 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -449,7 +449,9 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } + mutex_lock(&s->s_root->d_inode->i_mutex); root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + mutex_unlock(&s->s_root->d_inode->i_mutex); if (IS_ERR(root)) { up_write(&s->s_umount); deactivate_super(s); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index a2c821e3c3a7..ebf5362da1d2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -389,9 +389,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } -int btrfs_add_dead_root(struct btrfs_root *root, - struct btrfs_root *latest, - struct list_head *dead_list) +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -400,7 +398,10 @@ int btrfs_add_dead_root(struct btrfs_root *root, return -ENOMEM; dirty->root = root; dirty->latest_root = latest; - list_add(&dirty->list, dead_list); + + mutex_lock(&root->fs_info->trans_mutex); + list_add(&dirty->list, &latest->fs_info->dead_roots); + mutex_unlock(&root->fs_info->trans_mutex); return 0; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 2c73caeebb2c..598baa312417 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -90,8 +90,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest, - struct list_head *dead_list); +int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest); int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); int btrfs_clean_old_snapshots(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From be6e8dc0ba84029997075a1ec77b4ddb863cbe15 Mon Sep 17 00:00:00 2001 From: Balaji Rao Date: Mon, 21 Jul 2008 02:01:56 +0530 Subject: NFS support for btrfs - v3 Date: Mon, 21 Jul 2008 02:01:56 +0530 Here's an implementation of NFS support for btrfs. It relies on the fixes which are going in to 2.6.28 for the NFS readdir/lookup deadlock. This uses the btrfs_iget helper introduced previously. [dwmw2: Tidy up a little, switch to d_obtain_alias() w/compat routine, change fh_type, store parent's root object ID where needed, fix some get_parent() and fs_to_dentry() bugs] Signed-off-by: Balaji Rao Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/compat.h | 17 +++++ fs/btrfs/export.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/export.h | 19 +++++ fs/btrfs/super.c | 2 + 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 fs/btrfs/export.c create mode 100644 fs/btrfs/export.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index a4b38177abdb..75f8818cbfee 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,7 +7,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o bit-radix.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o + ref-cache.o export.o btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o else diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index b3349a66999d..d45fb37887bc 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -5,6 +5,23 @@ #define trylock_page(page) (!TestSetPageLocked(page)) #endif +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) +static inline struct dentry *d_obtain_alias(struct inode *inode) +{ + struct dentry *d; + + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + + d = d_alloc_anon(inode); + if (!d) + iput(inode); + return d; +} +#endif + /* * Even if AppArmor isn't enabled, it still has different prototypes. * Add more distro/version pairs here to declare which has AppArmor applied. diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c new file mode 100644 index 000000000000..797b4cbc3786 --- /dev/null +++ b/fs/btrfs/export.c @@ -0,0 +1,208 @@ +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "btrfs_inode.h" +#include "print-tree.h" +#include "export.h" +#include "compat.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#define FILEID_BTRFS_WITHOUT_PARENT 0x4d +#define FILEID_BTRFS_WITH_PARENT 0x4e +#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f +#endif + +#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) +#define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) + +static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, + int connectable) +{ + struct btrfs_fid *fid = (struct btrfs_fid *)fh; + struct inode *inode = dentry->d_inode; + int len = *max_len; + int type; + + if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || + (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) + return 255; + + len = BTRFS_FID_SIZE_NON_CONNECTABLE; + type = FILEID_BTRFS_WITHOUT_PARENT; + + fid->objectid = BTRFS_I(inode)->location.objectid; + fid->root_objectid = BTRFS_I(inode)->root->objectid; + fid->gen = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + u64 parent_root_id; + + spin_lock(&dentry->d_lock); + + parent = dentry->d_parent->d_inode; + fid->parent_objectid = BTRFS_I(parent)->location.objectid; + fid->parent_gen = parent->i_generation; + parent_root_id = BTRFS_I(parent)->root->objectid; + + spin_unlock(&dentry->d_lock); + + if (parent_root_id != fid->root_objectid) { + fid->parent_root_objectid = parent_root_id; + len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; + type = FILEID_BTRFS_WITH_PARENT_ROOT; + } else { + len = BTRFS_FID_SIZE_CONNECTABLE; + type = FILEID_BTRFS_WITH_PARENT; + } + } + + *max_len = len; + return type; +} + +static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, + u64 root_objectid, u32 generation) +{ + struct btrfs_root *root; + struct inode *inode; + struct dentry *result; + struct btrfs_key key; + + key.objectid = objectid; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + root = btrfs_lookup_fs_root(btrfs_sb(sb)->fs_info, root_objectid); + inode = btrfs_iget(sb, &key, root, NULL); + if (IS_ERR(inode)) + return (void *)inode; + + if (generation != inode->i_generation) { + iput(inode); + return ERR_PTR(-ESTALE); + } + + result = d_obtain_alias(inode); + if (!result) + return ERR_PTR(-ENOMEM); + + return result; +} + +static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct btrfs_fid *fid = (struct btrfs_fid *) fh; + u64 objectid, root_objectid; + u32 generation; + + if (fh_type == FILEID_BTRFS_WITH_PARENT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) + return NULL; + root_objectid = fid->root_objectid; + } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { + if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) + return NULL; + root_objectid = fid->parent_root_objectid; + } else + return NULL; + + objectid = fid->parent_objectid; + generation = fid->parent_gen; + + return btrfs_get_dentry(sb, objectid, root_objectid, generation); +} + +static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct btrfs_fid *fid = (struct btrfs_fid *) fh; + u64 objectid, root_objectid; + u32 generation; + + if ((fh_type != FILEID_BTRFS_WITH_PARENT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE) && + (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || + fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && + (fh_type != FILEID_BTRFS_WITHOUT_PARENT || + fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) + return NULL; + + objectid = fid->objectid; + root_objectid = fid->root_objectid; + generation = fid->gen; + + return btrfs_get_dentry(sb, objectid, root_objectid, generation); +} + +static struct dentry *btrfs_get_parent(struct dentry *child) +{ + struct inode *dir = child->d_inode; + struct inode *inode; + struct dentry *parent; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_key key; + struct btrfs_path *path; + struct extent_buffer *leaf; + u32 nritems; + int slot; + u64 objectid; + int ret; + + path = btrfs_alloc_path(); + + key.objectid = dir->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY); + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + BUG_ON(ret == 0); + ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) { + btrfs_free_path(path); + goto out; + } + leaf = path->nodes[0]; + slot = path->slots[0]; + } + + btrfs_free_path(path); + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY) + goto out; + + objectid = key.offset; + + /* Build a new key for the inode item */ + key.objectid = objectid; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); + + parent = d_obtain_alias(inode); + if (!parent) + parent = ERR_PTR(-ENOMEM); + + return parent; + +out: + btrfs_free_path(path); + return ERR_PTR(-EINVAL); +} + +const struct export_operations btrfs_export_ops = { + .encode_fh = btrfs_encode_fh, + .fh_to_dentry = btrfs_fh_to_dentry, + .fh_to_parent = btrfs_fh_to_parent, + .get_parent = btrfs_get_parent, +}; diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h new file mode 100644 index 000000000000..074348a95841 --- /dev/null +++ b/fs/btrfs/export.h @@ -0,0 +1,19 @@ +#ifndef BTRFS_EXPORT_H +#define BTRFS_EXPORT_H + +#include + +extern const struct export_operations btrfs_export_ops; + +struct btrfs_fid { + u64 objectid; + u64 root_objectid; + u32 gen; + + u64 parent_objectid; + u32 parent_gen; + + u64 parent_root_objectid; +} __attribute__ ((packed)); + +#endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index eb4b357d05e1..e830e0ed409a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -46,6 +46,7 @@ #include "xattr.h" #include "volumes.h" #include "version.h" +#include "export.h" #define BTRFS_SUPER_MAGIC 0x9123683E @@ -303,6 +304,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; + sb->s_export_op = &btrfs_export_ops; sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_POSIXACL; -- cgit v1.2.3 From 9d03632e26e1a0a9e4a632cf426a7c0566768a7d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Aug 2008 12:01:52 +0100 Subject: Fill f_fsid field in btrfs_statfs() Date: Mon, 18 Aug 2008 12:01:52 +0100 Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e830e0ed409a..6446ab73f35a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -489,6 +489,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct btrfs_root *root = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = &root->fs_info->super_copy; int bits = dentry->d_sb->s_blocksize_bits; + __be32 *fsid = (__be32 *)root->fs_info->fsid; buf->f_namelen = BTRFS_NAME_LEN; buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; @@ -497,6 +498,11 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail = buf->f_bfree; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_type = BTRFS_SUPER_MAGIC; + /* We treat it as constant endianness (it doesn't matter _which_) + because we want the fsid to come out the same whether mounted + on a big-endian or little-endian host */ + buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); + buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); return 0; } -- cgit v1.2.3 From 32d48fa1af1fe066a6a4798e6f5a50ac6a3ce4a3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 18 Aug 2008 13:10:20 +0100 Subject: Mask root object ID into f_fsid in btrfs_statfs() Date: Mon, 18 Aug 2008 13:10:20 +0100 This means that subvolumes get a different fsid, and NFS exporting them works properly. Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6446ab73f35a..55f4d00fda3a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -503,6 +503,10 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); + /* Mask in the root object ID too, to disambiguate subvols */ + buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32; + buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid; + return 0; } -- cgit v1.2.3 From 76fcef19c40328499a2f6d59d76b72fd03d2cc82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 19 Aug 2008 16:49:35 +0100 Subject: Btrfs: Reinstate '-osubvol=.' option to mount entire tree Date: Tue, 19 Aug 2008 16:49:35 +0100 This disappeared when I removed the special case for '.' in btrfs_lookup() Signed-off-by: David Woodhouse Signed-off-by: Chris Mason --- fs/btrfs/super.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 55f4d00fda3a..f7b3eac7ac6d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -451,21 +451,25 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } - mutex_lock(&s->s_root->d_inode->i_mutex); - root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); - mutex_unlock(&s->s_root->d_inode->i_mutex); - if (IS_ERR(root)) { - up_write(&s->s_umount); - deactivate_super(s); - error = PTR_ERR(root); - goto error; - } - if (!root->d_inode) { - dput(root); - up_write(&s->s_umount); - deactivate_super(s); - error = -ENXIO; - goto error; + if (!strcmp(subvol_name, ".")) + root = dget(s->s_root); + else { + mutex_lock(&s->s_root->d_inode->i_mutex); + root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name)); + mutex_unlock(&s->s_root->d_inode->i_mutex); + if (IS_ERR(root)) { + up_write(&s->s_umount); + deactivate_super(s); + error = PTR_ERR(root); + goto error; + } + if (!root->d_inode) { + dput(root); + up_write(&s->s_umount); + deactivate_super(s); + error = -ENXIO; + goto error; + } } mnt->mnt_sb = s; -- cgit v1.2.3 From 2b1f55b0f0d0d1a66470ef4ea2696cd5dd741a12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 11:48:04 -0400 Subject: Remove Btrfs compat code for older kernels Btrfs had compatibility code for kernels back to 2.6.18. These have been removed, and will be maintained in a separate backport git tree from now on. Signed-off-by: Chris Mason --- fs/btrfs/async-thread.c | 6 --- fs/btrfs/compat.h | 40 +------------- fs/btrfs/crc32c.h | 6 --- fs/btrfs/ctree.h | 7 +-- fs/btrfs/disk-io.c | 28 ---------- fs/btrfs/export.c | 6 --- fs/btrfs/extent_io.c | 66 ++--------------------- fs/btrfs/file.c | 24 +-------- fs/btrfs/inode.c | 135 +----------------------------------------------- fs/btrfs/super.c | 7 --- fs/btrfs/sysfs.c | 33 ------------ fs/btrfs/volumes.c | 20 ------- 12 files changed, 10 insertions(+), 368 deletions(-) (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 2ee301740195..4e780b279de6 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -20,13 +20,7 @@ #include #include #include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) # include -#else -# include -#endif - #include "async-thread.h" /* diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h index b0ed1887d9b1..cd6598b169df 100644 --- a/fs/btrfs/compat.h +++ b/fs/btrfs/compat.h @@ -1,9 +1,8 @@ #ifndef _COMPAT_H_ #define _COMPAT_H_ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,26) -#define trylock_page(page) (!TestSetPageLocked(page)) -#endif +#define btrfs_drop_nlink(inode) drop_nlink(inode) +#define btrfs_inc_nlink(inode) inc_nlink(inode) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,27) static inline struct dentry *d_obtain_alias(struct inode *inode) @@ -22,39 +21,4 @@ static inline struct dentry *d_obtain_alias(struct inode *inode) } #endif -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static inline void btrfs_drop_nlink(struct inode *inode) -{ - inode->i_nlink--; -} - -static inline void btrfs_inc_nlink(struct inode *inode) -{ - inode->i_nlink++; -} -#else -# define btrfs_drop_nlink(inode) drop_nlink(inode) -# define btrfs_inc_nlink(inode) inc_nlink(inode) -#endif - -/* - * Even if AppArmor isn't enabled, it still has different prototypes. - * Add more distro/version pairs here to declare which has AppArmor applied. - */ -#if defined(CONFIG_SUSE_KERNEL) -# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) -# define REMOVE_SUID_PATH 1 -# endif -#endif - -/* - * catch any other distros that have patched in apparmor. This isn't - * 100% reliable because it won't catch people that hand compile their - * own distro kernels without apparmor compiled in. But, it is better - * than nothing. - */ -#ifdef CONFIG_SECURITY_APPARMOR -# define REMOVE_SUID_PATH 1 -#endif - #endif /* _COMPAT_H_ */ diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index bf6c12e85730..4f0fefed132a 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -96,13 +96,7 @@ static inline u32 __btrfs_crc32c(u32 crc, unsigned char const *address, * We must workaround older implementations of crc32c_le() * found on older kernel versions. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -#define btrfs_crc32c(seed, data, length) \ - __cpu_to_le32( __btrfs_crc32c( __le32_to_cpu(seed), \ - (unsigned char const *)data, length) ) -#else #define btrfs_crc32c(seed, data, length) \ __btrfs_crc32c(seed, (unsigned char const *)data, length) #endif -#endif diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 138c157bbc45..3b3c1ca50c5d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1472,12 +1472,9 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) { ((unsigned long)(btrfs_leaf_data(leaf) + \ btrfs_item_offset_nr(leaf, slot))) -static inline struct dentry *fdentry(struct file *file) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - return file->f_dentry; -#else +static inline struct dentry *fdentry(struct file *file) +{ return file->f_path.dentry; -#endif } /* extent-tree.c */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d35ca6a3f513..dffb8dabd533 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,11 +26,7 @@ #include // for block_sync_page #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) # include -#else -# include -#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -373,21 +369,11 @@ out: return ret; } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_workqueue_bio(struct bio *bio, int err) -#else -static int end_workqueue_bio(struct bio *bio, - unsigned int bytes_done, int err) -#endif { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - fs_info = end_io_wq->info; end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; @@ -397,10 +383,6 @@ static int end_workqueue_bio(struct bio *bio, &end_io_wq->work); else btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, @@ -1161,9 +1143,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_init(bdi); -#endif bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; @@ -1242,11 +1222,7 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, error); -#else bio_endio(bio, error); -#endif } static int cleaner_kthread(void *arg) @@ -1673,9 +1649,7 @@ fail: kfree(extent_root); kfree(tree_root); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info); return ERR_PTR(err); } @@ -1936,9 +1910,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 2b357a6d2407..48b82cd7583c 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -7,12 +7,6 @@ #include "export.h" #include "compat.h" -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -#define FILEID_BTRFS_WITHOUT_PARENT 0x4d -#define FILEID_BTRFS_WITH_PARENT 0x4e -#define FILEID_BTRFS_WITH_PARENT_ROOT 0x4f -#endif - #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, parent_objectid)/4) #define BTRFS_FID_SIZE_CONNECTABLE (offsetof(struct btrfs_fid, parent_root_objectid)/4) #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid)/4) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 58ad25838a41..e3a25be5c663 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1397,12 +1397,7 @@ static int check_page_writeback(struct extent_io_tree *tree, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_writepage(struct bio *bio, int err) -#else -static int end_bio_extent_writepage(struct bio *bio, - unsigned int bytes_done, int err) -#endif { int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1412,10 +1407,6 @@ static int end_bio_extent_writepage(struct bio *bio, int whole_page; int ret; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1461,10 +1452,8 @@ static int end_bio_extent_writepage(struct bio *bio, else check_page_writeback(tree, page); } while (bvec >= bio->bi_io_vec); + bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } /* @@ -1478,12 +1467,7 @@ static int end_bio_extent_writepage(struct bio *bio, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_readpage(struct bio *bio, int err) -#else -static int end_bio_extent_readpage(struct bio *bio, - unsigned int bytes_done, int err) -#endif { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1493,11 +1477,6 @@ static int end_bio_extent_readpage(struct bio *bio, int whole_page; int ret; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1556,9 +1535,6 @@ static int end_bio_extent_readpage(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } /* @@ -1566,12 +1542,7 @@ static int end_bio_extent_readpage(struct bio *bio, * the structs in the extent tree when done, and set the uptodate bits * as appropriate. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_extent_preparewrite(struct bio *bio, int err) -#else -static int end_bio_extent_preparewrite(struct bio *bio, - unsigned int bytes_done, int err) -#endif { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; @@ -1579,11 +1550,6 @@ static int end_bio_extent_preparewrite(struct bio *bio, u64 start; u64 end; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - do { struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1607,9 +1573,6 @@ static int end_bio_extent_preparewrite(struct bio *bio, } while (bvec >= bio->bi_io_vec); bio_put(bio); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } static struct bio * @@ -2079,12 +2042,6 @@ done: return 0; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -/* Taken directly from 2.6.23 with a mod for a lockpage hook */ -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, - void *data); -#endif - /** * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. * @mapping: address space structure to write @@ -2201,10 +2158,9 @@ retry: } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = index; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + if (wbc->range_cont) wbc->range_start = index << PAGE_CACHE_SHIFT; -#endif return ret; } EXPORT_SYMBOL(extent_write_cache_pages); @@ -2560,18 +2516,10 @@ static inline struct page *extent_buffer_page(struct extent_buffer *eb, * by increasing the reference count. So we know the page must * be in the radix tree. */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) rcu_read_lock(); -#else - read_lock_irq(&mapping->tree_lock); -#endif p = radix_tree_lookup(&mapping->page_tree, i); - -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) rcu_read_unlock(); -#else - read_unlock_irq(&mapping->tree_lock); -#endif + return p; } @@ -2773,21 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, } } clear_page_dirty_for_io(page); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) spin_lock_irq(&page->mapping->tree_lock); -#else - read_lock_irq(&page->mapping->tree_lock); -#endif if (!PageDirty(page)) { radix_tree_tag_clear(&page->mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) spin_unlock_irq(&page->mapping->tree_lock); -#else - read_unlock_irq(&page->mapping->tree_lock); -#endif unlock_page(page); } return 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 48a702d41c8c..8856570a0ebd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -871,15 +871,8 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, goto out_nolock; if (count == 0) goto out_nolock; -#ifdef REMOVE_SUID_PATH - err = remove_suid(&file->f_path); -#else -# if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) + err = file_remove_suid(file); -# else - err = remove_suid(fdentry(file)); -# endif -#endif if (err) goto out_nolock; file_update_time(file); @@ -1003,17 +996,10 @@ out_nolock: btrfs_commit_transaction(trans, root); } } else if (num_written > 0 && (file->f_flags & O_DIRECT)) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - do_sync_file_range(file, start_pos, - start_pos + num_written - 1, - SYNC_FILE_RANGE_WRITE | - SYNC_FILE_RANGE_WAIT_AFTER); -#else do_sync_mapping_range(inode->i_mapping, start_pos, start_pos + num_written - 1, SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER); -#endif invalidate_mapping_pages(inode->i_mapping, start_pos >> PAGE_CACHE_SHIFT, (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); @@ -1097,12 +1083,7 @@ out: } static struct vm_operations_struct btrfs_file_vm_ops = { -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - .nopage = filemap_nopage, - .populate = filemap_populate, -#else .fault = filemap_fault, -#endif .page_mkwrite = btrfs_page_mkwrite, }; @@ -1118,9 +1099,6 @@ struct file_operations btrfs_file_operations = { .read = do_sync_read, .aio_read = generic_file_aio_read, .splice_read = generic_file_splice_read, -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - .sendfile = generic_file_sendfile, -#endif .write = btrfs_file_write, .mmap = btrfs_file_mmap, .open = generic_file_open, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index adb169d739ce..48a3dc030807 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2073,104 +2073,6 @@ err: return ret; } -/* Kernels earlier than 2.6.28 still have the NFS deadlock where nfsd - will call the file system's ->lookup() method from within its - filldir callback, which in turn was called from the file system's - ->readdir() method. And will deadlock for many file systems. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -struct nfshack_dirent { - u64 ino; - loff_t offset; - int namlen; - unsigned int d_type; - char name[]; -}; - -struct nfshack_readdir { - char *dirent; - size_t used; - int full; -}; - - - -static int btrfs_nfshack_filldir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) -{ - struct nfshack_readdir *buf = __buf; - struct nfshack_dirent *de = (void *)(buf->dirent + buf->used); - unsigned int reclen; - - reclen = ALIGN(sizeof(struct nfshack_dirent) + namlen, sizeof(u64)); - if (buf->used + reclen > PAGE_SIZE) { - buf->full = 1; - return -EINVAL; - } - - de->namlen = namlen; - de->offset = offset; - de->ino = ino; - de->d_type = d_type; - memcpy(de->name, name, namlen); - buf->used += reclen; - - return 0; -} - -static int btrfs_nfshack_readdir(struct file *file, void *dirent, - filldir_t filldir) -{ - struct nfshack_readdir buf; - struct nfshack_dirent *de; - int err; - int size; - loff_t offset; - - buf.dirent = (void *)__get_free_page(GFP_KERNEL); - if (!buf.dirent) - return -ENOMEM; - - offset = file->f_pos; - - do { - unsigned int reclen; - - buf.used = 0; - buf.full = 0; - err = btrfs_real_readdir(file, &buf, btrfs_nfshack_filldir); - if (err) - break; - - size = buf.used; - - if (!size) - break; - - de = (struct nfshack_dirent *)buf.dirent; - while (size > 0) { - offset = de->offset; - - if (filldir(dirent, de->name, de->namlen, de->offset, - de->ino, de->d_type)) - goto done; - offset = file->f_pos; - - reclen = ALIGN(sizeof(*de) + de->namlen, - sizeof(u64)); - size -= reclen; - de = (struct nfshack_dirent *)((char *)de + reclen); - } - } while (buf.full); - - done: - free_page((unsigned long)buf.dirent); - file->f_pos = offset; - - return err; -} -#endif - int btrfs_write_inode(struct inode *inode, int wait) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3311,13 +3213,8 @@ unsigned long btrfs_force_ra(struct address_space *mapping, { pgoff_t req_size = last_index - offset + 1; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - offset = page_cache_readahead(mapping, ra, file, offset, req_size); - return offset; -#else page_cache_sync_readahead(mapping, ra, file, offset, req_size); return offset + req_size; -#endif } struct inode *btrfs_alloc_inode(struct super_block *sb) @@ -3373,14 +3270,7 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) static void init_once(void *foo) -#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) -static void init_once(struct kmem_cache * cachep, void *foo) -#else -static void init_once(void * foo, struct kmem_cache * cachep, - unsigned long flags) -#endif { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -3403,22 +3293,10 @@ void btrfs_destroy_cachep(void) struct kmem_cache *btrfs_cache_create(const char *name, size_t size, unsigned long extra_flags, -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) - void (*ctor)(void *) -#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) - void (*ctor)(struct kmem_cache *, void *) -#else - void (*ctor)(void *, struct kmem_cache *, - unsigned long) -#endif - ) + void (*ctor)(void *)) { return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD | extra_flags), ctor -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - ,NULL -#endif - ); + SLAB_MEM_SPREAD | extra_flags), ctor); } int btrfs_init_cachep(void) @@ -3666,12 +3544,7 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) static int btrfs_permission(struct inode *inode, int mask) -#else -static int btrfs_permission(struct inode *inode, int mask, - struct nameidata *nd) -#endif { if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE)) return -EACCES; @@ -3702,11 +3575,7 @@ static struct inode_operations btrfs_dir_ro_inode_operations = { static struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - .readdir = btrfs_nfshack_readdir, -#else /* NFSd readdir/lookup deadlock is fixed */ .readdir = btrfs_real_readdir, -#endif .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f7b3eac7ac6d..8399d6d05d63 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -349,10 +349,7 @@ static int btrfs_fill_super(struct super_block * sb, sb->s_root = root_dentry; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) save_mount_options(sb, data); -#endif - return 0; fail_close: @@ -566,11 +563,7 @@ static struct super_operations btrfs_super_ops = { .put_super = btrfs_put_super, .write_super = btrfs_write_super, .sync_fs = btrfs_sync_fs, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - .read_inode = btrfs_read_locked_inode, -#else .show_options = generic_show_options, -#endif .write_inode = btrfs_write_inode, .dirty_inode = btrfs_dirty_inode, .alloc_inode = btrfs_alloc_inode, diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 61af5d8446e3..300076e66765 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -28,7 +28,6 @@ #include "disk-io.h" #include "transaction.h" -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf) { return snprintf(buf, PAGE_SIZE, "%llu\n", @@ -267,35 +266,3 @@ void btrfs_exit_sysfs(void) kset_unregister(btrfs_kset); } -#else - -int btrfs_sysfs_add_super(struct btrfs_fs_info *fs) -{ - return 0; -} - -int btrfs_sysfs_add_root(struct btrfs_root *root) -{ - return 0; -} - -void btrfs_sysfs_del_root(struct btrfs_root *root) -{ - return; -} - -void btrfs_sysfs_del_super(struct btrfs_fs_info *fs) -{ - return; -} - -int btrfs_init_sysfs(void) -{ - return 0; -} - -void btrfs_exit_sysfs(void) -{ - return; -} -#endif diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b9e5c2d82dde..ddf89626498a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2080,20 +2080,11 @@ int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree, } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_bio_multi_stripe(struct bio *bio, int err) -#else -static int end_bio_multi_stripe(struct bio *bio, - unsigned int bytes_done, int err) -#endif { struct btrfs_multi_bio *multi = bio->bi_private; int is_orig_bio = 0; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif if (err) atomic_inc(&multi->error); @@ -2122,17 +2113,10 @@ static int end_bio_multi_stripe(struct bio *bio, } kfree(multi); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, err); -#else bio_endio(bio, err); -#endif } else if (!is_orig_bio) { bio_put(bio); } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } struct async_sched { @@ -2248,11 +2232,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, } else { bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; bio->bi_sector = logical >> 9; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, -EIO); -#else bio_endio(bio, -EIO); -#endif } dev_nr++; } -- cgit v1.2.3 From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/Makefile | 2 +- fs/btrfs/TODO | 20 ----- fs/btrfs/async-thread.c | 10 ++- fs/btrfs/async-thread.h | 7 +- fs/btrfs/bit-radix.c | 130 --------------------------------- fs/btrfs/bit-radix.h | 33 --------- fs/btrfs/btrfs_inode.h | 54 +++++++++++++- fs/btrfs/crc32c.h | 18 +++++ fs/btrfs/ctree.c | 127 +++++++++++++++++++++++++++++--- fs/btrfs/ctree.h | 1 - fs/btrfs/dir-item.c | 41 +++++++++++ fs/btrfs/disk-io.c | 33 ++++++++- fs/btrfs/extent_io.c | 34 ++++++++- fs/btrfs/extent_map.c | 10 +++ fs/btrfs/file.c | 44 ++++++++++- fs/btrfs/inode.c | 189 +++++++++++++++++++++++++++++++----------------- fs/btrfs/locking.c | 13 ++++ fs/btrfs/ordered-data.c | 19 ++++- fs/btrfs/ref-cache.c | 26 +++++++ fs/btrfs/ref-cache.h | 3 + fs/btrfs/root-tree.c | 21 +++++- fs/btrfs/struct-funcs.c | 21 ++++++ fs/btrfs/super.c | 3 + fs/btrfs/transaction.c | 67 ++++++++++++++++- fs/btrfs/tree-defrag.c | 4 + 25 files changed, 653 insertions(+), 277 deletions(-) delete mode 100644 fs/btrfs/TODO delete mode 100644 fs/btrfs/bit-radix.c delete mode 100644 fs/btrfs/bit-radix.h (limited to 'fs/btrfs/super.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d5c28557fba9..48b7909ca8d1 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) obj-m := btrfs.o btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ file-item.o inode-item.o inode-map.o disk-io.o \ - transaction.o bit-radix.o inode.o file.o tree-defrag.o \ + transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ ref-cache.o export.o tree-log.o acl.o free-space-cache.o diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO deleted file mode 100644 index d9b6d38c603a..000000000000 --- a/fs/btrfs/TODO +++ /dev/null @@ -1,20 +0,0 @@ -* cleanup, add more error checking, get rid of BUG_ONs -* Fix ENOSPC handling -* Make allocator smarter -* add a block group to struct inode -* Do actual block accounting -* Check compat and incompat flags on the inode -* Get rid of struct ctree_path, limiting tree levels held at one time -* Add generation number to key pointer in nodes -* Add generation number to inode -* forbid cross subvolume renames and hardlinks -* Release -* Do real tree locking -* Add extent mirroring (backup copies of blocks) -* Add fancy interface to get access to incremental backups -* Add fancy striped extents to make big reads faster -* Use relocation to try and fix write errors -* Make allocator much smarter -* xattrs (directory streams for regular files) -* Scrub & defrag - diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 4e780b279de6..04fb9702d14c 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -231,17 +231,25 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) /* * if we pick a busy task, move the task to the end of the list. - * hopefully this will keep things somewhat evenly balanced + * hopefully this will keep things somewhat evenly balanced. + * Do the move in batches based on the sequence number. This groups + * requests submitted at roughly the same time onto the same worker. */ next = workers->worker_list.next; worker = list_entry(next, struct btrfs_worker_thread, worker_list); atomic_inc(&worker->num_pending); worker->sequence++; + if (worker->sequence % workers->idle_thresh == 0) list_move_tail(next, &workers->worker_list); return worker; } +/* + * selects a worker thread to take the next job. This will either find + * an idle worker, start a new worker up to the max count, or just return + * one of the existing busy workers. + */ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) { struct btrfs_worker_thread *worker; diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 43e44d115dd1..4ec9a2ee0f9d 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -63,14 +63,17 @@ struct btrfs_workers { /* once a worker has this many requests or fewer, it is idle */ int idle_thresh; - /* list with all the work threads */ + /* list with all the work threads. The workers on the idle thread + * may be actively servicing jobs, but they haven't yet hit the + * idle thresh limit above. + */ struct list_head worker_list; struct list_head idle_list; /* lock for finding the next worker thread to queue on */ spinlock_t lock; - /* extra name for this worker */ + /* extra name for this worker, used for current->name */ char *name; }; diff --git a/fs/btrfs/bit-radix.c b/fs/btrfs/bit-radix.c deleted file mode 100644 index e8bf876db393..000000000000 --- a/fs/btrfs/bit-radix.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include "bit-radix.h" - -#define BIT_ARRAY_BYTES 256 -#define BIT_RADIX_BITS_PER_ARRAY ((BIT_ARRAY_BYTES - sizeof(unsigned long)) * 8) - -extern struct kmem_cache *btrfs_bit_radix_cachep; -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int ret; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) { - bits = kmem_cache_alloc(btrfs_bit_radix_cachep, GFP_NOFS); - if (!bits) - return -ENOMEM; - memset(bits + 1, 0, BIT_ARRAY_BYTES - sizeof(unsigned long)); - bits[0] = slot; - ret = radix_tree_insert(radix, slot, bits); - if (ret) - return ret; - } - ret = test_and_set_bit(bit_slot, bits + 1); - if (ret < 0) - ret = 1; - return ret; -} - -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - return test_bit(bit_slot, bits + 1); -} - -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit) -{ - unsigned long *bits; - unsigned long slot; - int bit_slot; - int i; - int empty = 1; - - slot = bit / BIT_RADIX_BITS_PER_ARRAY; - bit_slot = bit % BIT_RADIX_BITS_PER_ARRAY; - - bits = radix_tree_lookup(radix, slot); - if (!bits) - return 0; - clear_bit(bit_slot, bits + 1); - for (i = 1; i < BIT_ARRAY_BYTES / sizeof(unsigned long); i++) { - if (bits[i]) { - empty = 0; - break; - } - } - if (empty) { - bits = radix_tree_delete(radix, slot); - BUG_ON(!bits); - kmem_cache_free(btrfs_bit_radix_cachep, bits); - } - return 0; -} - -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr) -{ - unsigned long *bits; - unsigned long *gang[4]; - int found; - int ret; - int i; - int total_found = 0; - unsigned long slot; - - slot = start / BIT_RADIX_BITS_PER_ARRAY; - ret = radix_tree_gang_lookup(radix, (void **)gang, slot, - ARRAY_SIZE(gang)); - found = start % BIT_RADIX_BITS_PER_ARRAY; - for (i = 0; i < ret && nr > 0; i++) { - bits = gang[i]; - while(nr > 0) { - found = find_next_bit(bits + 1, - BIT_RADIX_BITS_PER_ARRAY, - found); - if (found < BIT_RADIX_BITS_PER_ARRAY) { - *retbits = bits[0] * - BIT_RADIX_BITS_PER_ARRAY + found; - retbits++; - nr--; - total_found++; - found++; - } else - break; - } - found = 0; - } - return total_found; -} diff --git a/fs/btrfs/bit-radix.h b/fs/btrfs/bit-radix.h deleted file mode 100644 index c100f54d5c32..000000000000 --- a/fs/btrfs/bit-radix.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (C) 2007 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __BIT_RADIX__ -#define __BIT_RADIX__ -#include - -int set_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int test_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int clear_radix_bit(struct radix_tree_root *radix, unsigned long bit); -int find_first_radix_bit(struct radix_tree_root *radix, unsigned long *retbits, - unsigned long start, int nr); - -static inline void init_bit_radix(struct radix_tree_root *radix) -{ - INIT_RADIX_TREE(radix, GFP_NOFS); -} -#endif diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0577fda2168a..0b2e623cf421 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -25,27 +25,58 @@ /* in memory btrfs inode */ struct btrfs_inode { + /* which subvolume this inode belongs to */ struct btrfs_root *root; + + /* the block group preferred for allocations. This pointer is buggy + * and needs to be replaced with a bytenr instead + */ struct btrfs_block_group_cache *block_group; + + /* key used to find this inode on disk. This is used by the code + * to read in roots of subvolumes + */ struct btrfs_key location; + + /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; + + /* the io_tree does range state (DIRTY, LOCKED etc) */ struct extent_io_tree io_tree; + + /* special utility tree used to record which mirrors have already been + * tried when checksums fail for a given block + */ struct extent_io_tree io_failure_tree; + + /* held while inserting checksums to avoid races */ struct mutex csum_mutex; + + /* held while inesrting or deleting extents from files */ struct mutex extent_mutex; + + /* held while logging the inode in tree-log.c */ struct mutex log_mutex; - struct inode vfs_inode; + + /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; + /* standard acl pointers */ struct posix_acl *i_acl; struct posix_acl *i_default_acl; /* for keeping track of orphaned inodes */ struct list_head i_orphan; + /* list of all the delalloc inodes in the FS. There are times we need + * to write all the delalloc pages to disk, and this list is used + * to walk them all. + */ struct list_head delalloc_inodes; - /* full 64 bit generation number */ + /* full 64 bit generation number, struct vfs_inode doesn't have a big + * enough field for this. + */ u64 generation; /* @@ -57,10 +88,25 @@ struct btrfs_inode { */ u64 logged_trans; - /* trans that last made a change that should be fully fsync'd */ + /* + * trans that last made a change that should be fully fsync'd. This + * gets reset to zero each time the inode is logged + */ u64 log_dirty_trans; + + /* total number of bytes pending delalloc, used by stat to calc the + * real block usage of the file + */ u64 delalloc_bytes; + + /* + * the size of the file stored in the metadata on disk. data=ordered + * means the in-memory i_size might be larger than the size on disk + * because not all the blocks are written yet. + */ u64 disk_i_size; + + /* flags field from the on disk inode */ u32 flags; /* @@ -68,6 +114,8 @@ struct btrfs_inode { * number for new files that are created */ u64 index_cnt; + + struct inode vfs_inode; }; static inline struct btrfs_inode *BTRFS_I(struct inode *inode) diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h index 4f0fefed132a..1eaf11d334fd 100644 --- a/fs/btrfs/crc32c.h +++ b/fs/btrfs/crc32c.h @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #ifndef __BTRFS_CRC32C__ #define __BTRFS_CRC32C__ #include diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50e81f43e6d4..ff3261ff2e19 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007,2008 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -54,12 +54,19 @@ struct btrfs_path *btrfs_alloc_path(void) return path; } +/* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { btrfs_release_path(NULL, p); kmem_cache_free(btrfs_path_cachep, p); } +/* + * path release drops references on the extent buffers in the path + * and it drops any locks held by this path + * + * It is safe to call this on paths that no locks or extent buffers held. + */ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) { int i; @@ -77,6 +84,16 @@ void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) } } +/* + * safely gets a reference on the root node of a tree. A lock + * is not taken, so a concurrent writer may put a different node + * at the root of the tree. See btrfs_lock_root_node for the + * looping required. + * + * The extent buffer returned by this has a reference taken, so + * it won't disappear. It may stop being the root of the tree + * at any time because there are no locks held. + */ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -87,6 +104,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root) return eb; } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root. A locked buffer + * is returned, with a reference held. + */ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) { struct extent_buffer *eb; @@ -108,6 +129,10 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) return eb; } +/* cowonly root (everything not a reference counted cow subvolume), just get + * put onto a simple dirty list. transaction.c walks this to make sure they + * get properly updated on disk. + */ static void add_root_to_dirty_list(struct btrfs_root *root) { if (root->track_dirty && list_empty(&root->dirty_list)) { @@ -116,6 +141,11 @@ static void add_root_to_dirty_list(struct btrfs_root *root) } } +/* + * used by snapshot creation to make a copy of a root for a tree with + * a given objectid. The buffer with the new root node is returned in + * cow_ret, and this func returns zero on success or a negative error code. + */ int btrfs_copy_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -167,6 +197,22 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, return 0; } +/* + * does the dirty work in cow of a single block. The parent block + * (if supplied) is updated to point to the new cow copy. The new + * buffer is marked dirty and returned locked. If you modify the block + * it needs to be marked dirty again. + * + * search_start -- an allocation hint for the new block + * + * empty_size -- a hint that you plan on doing more cow. This is the size in bytes + * the allocator should try to find free next to the block it returns. This is + * just a hint and may be ignored by the allocator. + * + * prealloc_dest -- if you have already reserved a destination for the cow, + * this uses that block instead of allocating a new one. btrfs_alloc_reserved_extent + * is used to finish the allocation. + */ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, @@ -311,6 +357,11 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +/* + * cows a single block, see __btrfs_cow_block for the real work. + * This version of it has extra checks so that a block isn't cow'd more than + * once per transaction, as long as it hasn't been written yet + */ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf, struct extent_buffer *parent, int parent_slot, @@ -347,6 +398,10 @@ int noinline btrfs_cow_block(struct btrfs_trans_handle *trans, return ret; } +/* + * helper function for defrag to decide if two blocks pointed to by a + * node are actually close by + */ static int close_blocks(u64 blocknr, u64 other, u32 blocksize) { if (blocknr < other && other - (blocknr + blocksize) < 32768) @@ -381,6 +436,11 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) } +/* + * this is used by the defrag code to go through all the + * leaves pointed to by a node and reallocate them so that + * disk order is close to key order + */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, int start_slot, int cache_only, u64 *last_ret, @@ -521,6 +581,10 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root, return btrfs_item_offset_nr(leaf, nr - 1); } +/* + * extra debugging checks to make sure all the items in a key are + * well formed and in the proper order + */ static int check_node(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -561,6 +625,10 @@ static int check_node(struct btrfs_root *root, struct btrfs_path *path, return 0; } +/* + * extra checking to make sure all the items in a leaf are + * well formed and in the proper order + */ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, int level) { @@ -782,6 +850,10 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } +/* given a node and slot number, this reads the blocks it points to. The + * extent buffer is returned with a reference taken (but unlocked). + * NULL is returned on error. + */ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, struct extent_buffer *parent, int slot) { @@ -798,6 +870,11 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, btrfs_node_ptr_generation(parent, slot)); } +/* + * node level balancing, used to make sure nodes are in proper order for + * item deletion. We balance from the top down, so we have to make sure + * that a deletion won't leave an node completely empty later on. + */ static noinline int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1024,7 +1101,10 @@ enospc: return ret; } -/* returns zero if the push worked, non-zero otherwise */ +/* Node balancing for insertion. Here we only split or push nodes around + * when they are completely full. This is also done top down, so we + * have to be pessimistic. + */ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level) @@ -1150,7 +1230,8 @@ static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, } /* - * readahead one full node of leaves + * readahead one full node of leaves, finding things that are close + * to the block in 'slot', and triggering ra on them. */ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, @@ -1226,6 +1307,19 @@ static noinline void reada_for_search(struct btrfs_root *root, } } +/* + * when we walk down the tree, it is usually safe to unlock the higher layers in + * the tree. The exceptions are when our path goes through slot 0, because operations + * on the tree might require changing key pointers higher up in the tree. + * + * callers might also have set path->keep_locks, which tells this code to + * keep the lock if the path points to the last slot in the block. This is + * part of walking through the tree, and selecting the next slot in the higher + * block. + * + * lowest_unlock sets the lowest level in the tree we're allowed to unlock. + * so if lowest_unlock is 1, level 0 won't be unlocked + */ static noinline void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) { @@ -2705,6 +2799,12 @@ again: return ret; } +/* + * make the item pointed to by the path smaller. new_size indicates + * how small to make it, and from_end tells us if we just chop bytes + * off the end of the item or if we shift the item to chop bytes off + * the front. + */ int btrfs_truncate_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -2818,6 +2918,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, return ret; } +/* + * make the item pointed to by the path bigger, data_size is the new size. + */ int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u32 data_size) @@ -2897,7 +3000,7 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, } /* - * Given a key and some data, insert an item into the tree. + * Given a key and some data, insert items into the tree. * This does all the path init required, making room in the tree if needed. */ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, @@ -3046,9 +3149,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root /* * delete the pointer from a given node. * - * If the delete empties a node, the node is removed from the tree, - * continuing all the way the root if required. The root is converted into - * a leaf if all the nodes are emptied. + * the tree should have been previously balanced so the deletion does not + * empty a node. */ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int level, int slot) @@ -3233,6 +3335,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * search the tree again to find a leaf with lesser keys * returns 0 if it found something or 1 if there are no lesser leaves. * returns < 0 on io errors. + * + * This may release the path, and so you may lose any locks held at the + * time you call it. */ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { @@ -3265,9 +3370,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) /* * A helper function to walk down the tree starting at min_key, and looking * for nodes or leaves that are either in cache or have a minimum - * transaction id. This is used by the btree defrag code, but could - * also be used to search for blocks that have changed since a given - * transaction id. + * transaction id. This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the @@ -3279,6 +3382,10 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) * This honors path->lowest_level to prevent descent past a given level * of the tree. * + * min_trans indicates the oldest transaction that you are interested + * in walking through. Any nodes or leaves older than min_trans are + * skipped over (without reading them). + * * returns zero if something useful was found, < 0 on error and 1 if there * was nothing in the tree that matched the search criteria. */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0079b60b18f3..ded1643c0273 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -27,7 +27,6 @@ #include #include #include -#include "bit-radix.h" #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index e4f30090d640..5040b71f1900 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -21,6 +21,14 @@ #include "hash.h" #include "transaction.h" +/* + * insert a name into a directory, doing overflow properly if there is a hash + * collision. data_size indicates how big the item inserted should be. On + * success a struct btrfs_dir_item pointer is returned, otherwise it is + * an ERR_PTR. + * + * The name is not copied into the dir item, you have to do that yourself. + */ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -55,6 +63,10 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle return (struct btrfs_dir_item *)ptr; } +/* + * xattrs work a lot like directories, this inserts an xattr item + * into the tree + */ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, u16 name_len, const void *data, u16 data_len, @@ -109,6 +121,13 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, return ret; } +/* + * insert a directory item in the tree, doing all the magic for + * both indexes. 'dir' indicates which objectid to insert it into, + * 'location' is the key to stuff into the directory item, 'type' is the + * type of the inode we're pointing to, and 'index' is the sequence number + * to use for the second index (if one is created). + */ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 dir, struct btrfs_key *location, u8 type, u64 index) @@ -184,6 +203,11 @@ out: return 0; } +/* + * lookup a directory item based on name. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + */ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 dir, @@ -222,6 +246,14 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * lookup a directory item based on index. 'dir' is the objectid + * we're searching in, and 'mod' tells us if you plan on deleting the + * item (use mod < 0) or changing the options (use mod > 0) + * + * The name is used to make sure the index really points to the name you were + * looking for. + */ struct btrfs_dir_item * btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -282,6 +314,11 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +/* + * helper function to look at the directory item pointed to by 'path' + * this walks through all the entries in a dir item and finds one + * for a specific name. + */ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, struct btrfs_path *path, const char *name, int name_len) @@ -313,6 +350,10 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, return NULL; } +/* + * given a pointer into a directory item, delete it. This + * handles items that have more than one entry in them. + */ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45b4f7285275..5ee10d3136f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +/* + * end_io_wq structs are used to do processing in task context when an IO is + * complete. This is used during reads to verify checksums, and it is used + * by writes to insert metadata for new file extents after IO is complete. + */ struct end_io_wq { struct bio *bio; bio_end_io_t *end_io; @@ -66,6 +71,11 @@ struct end_io_wq { struct btrfs_work work; }; +/* + * async submit bios are used to offload expensive checksumming + * onto the worker threads. They checksum file and metadata bios + * just before they are sent down the IO stack. + */ struct async_submit_bio { struct inode *inode; struct bio *bio; @@ -76,6 +86,10 @@ struct async_submit_bio { struct btrfs_work work; }; +/* + * extents on the btree inode are pretty simple, there's one extent + * that covers the entire device + */ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result) *(__le32 *)result = ~cpu_to_le32(crc); } +/* + * compute the csum for a btree block, and either verify it or write it + * into the csum field of the block. + */ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { @@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +/* + * we can't consider a given block up to date unless the transid of the + * block matches the transid in the parent node's pointer. This is how we + * detect blocks that either didn't get written at all or got written + * in the wrong place. + */ static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { @@ -228,9 +252,12 @@ out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; - } +/* + * helper to read a given tree block, doing retries as required when + * the checksums don't match and we have alternate mirrors to try. + */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror return -EIO; } +/* + * checksum a dirty tree block before IO. This has extra checks to make + * sure we only fill in the checksum field in the first page of a multi-page block + */ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8bd1b402f3fd..563b2d12f4f2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -914,6 +914,10 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(wait_on_extent_writeback); +/* + * either insert or lock state struct between start and end use mask to tell + * us if waiting is desired. + */ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { int err; @@ -982,6 +986,13 @@ int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(set_range_writeback); +/* + * find the first offset in the io tree with 'bits' set. zero is + * returned if we find something, and *start_ret and *end_ret are + * set to reflect the state struct that was found. + * + * If nothing was found, 1 is returned, < 0 on error + */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, int bits) { @@ -1017,6 +1028,10 @@ out: } EXPORT_SYMBOL(find_first_extent_bit); +/* find the first state struct with 'bits' set after 'start', and + * return it. tree->lock must be held. NULL will returned if + * nothing was found after 'start' + */ struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree, u64 start, int bits) { @@ -1046,8 +1061,14 @@ out: } EXPORT_SYMBOL(find_first_extent_bit_state); -u64 find_lock_delalloc_range(struct extent_io_tree *tree, - u64 *start, u64 *end, u64 max_bytes) +/* + * find a contiguous range of bytes in the file marked as delalloc, not + * more than 'max_bytes'. start and end are used to return the range, + * + * 1 is returned if we find something, 0 if nothing was in the tree + */ +static noinline u64 find_lock_delalloc_range(struct extent_io_tree *tree, + u64 *start, u64 *end, u64 max_bytes) { struct rb_node *node; struct extent_state *state; @@ -1130,6 +1151,11 @@ out: return found; } +/* + * count the number of bytes in the tree that have a given bit(s) + * set. This can be fairly slow, except for EXTENT_DIRTY which is + * cached. The total number found is returned. + */ u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, u64 max_bytes, unsigned long bits) @@ -1245,6 +1271,10 @@ int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) } EXPORT_SYMBOL(unlock_range); +/* + * set the private field for a given byte offset in the tree. If there isn't + * an extent_state there already, this does nothing. + */ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) { struct rb_node *node; diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 78ced11d18c7..74b2a29880d3 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -114,6 +114,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, return NULL; } +/* + * search through the tree for an extent_map with a given offset. If + * it can't be found, try to find some neighboring extents + */ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, struct rb_node **prev_ret, struct rb_node **next_ret) @@ -160,6 +164,10 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, return NULL; } +/* + * look for an offset in the tree, and if it can't be found, return + * the first offset we can find smaller than 'offset'. + */ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) { struct rb_node *prev; @@ -170,6 +178,7 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) return ret; } +/* check to see if two extent_map structs are adjacent and safe to merge */ static int mergable_maps(struct extent_map *prev, struct extent_map *next) { if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) @@ -250,6 +259,7 @@ out: } EXPORT_SYMBOL(add_extent_mapping); +/* simple helper to do math around the end of an extent, handling wrap */ static u64 range_end(u64 start, u64 len) { if (start + len < start) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 1b7e51a9db0f..3088a1184483 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -41,6 +41,9 @@ #include "compat.h" +/* simple helper to fault in pages and copy. This should go away + * and be replaced with calls into generic code. + */ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes, struct page **prepared_pages, @@ -72,12 +75,19 @@ static int noinline btrfs_copy_from_user(loff_t pos, int num_pages, return page_fault ? -EFAULT : 0; } +/* + * unlocks pages after btrfs_file_write is done with them + */ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) { size_t i; for (i = 0; i < num_pages; i++) { if (!pages[i]) break; + /* page checked is some magic around finding pages that + * have been modified without going through btrfs_set_page_dirty + * clear it here + */ ClearPageChecked(pages[i]); unlock_page(pages[i]); mark_page_accessed(pages[i]); @@ -85,6 +95,10 @@ static void noinline btrfs_drop_pages(struct page **pages, size_t num_pages) } } +/* this does all the hard work for inserting an inline extent into + * the btree. Any existing inline extent is extended as required to make room, + * otherwise things are inserted as required into the btree + */ static int noinline insert_inline_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, u64 offset, size_t size, @@ -228,6 +242,14 @@ fail: return err; } +/* + * after copy_from_user, pages need to be dirtied and we need to make + * sure holes are created between the current EOF and the start of + * any next extents (if required). + * + * this also makes the decision about creating an inline extent vs + * doing real data extents, marking pages dirty and delalloc as required. + */ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct file *file, @@ -362,6 +384,10 @@ out_unlock: return err; } +/* + * this drops all the extents in the cache that intersect the range + * [start, end]. Existing extents are split as required. + */ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned) { @@ -536,6 +562,9 @@ out: * If an extent intersects the range but is not entirely inside the range * it is either truncated or split. Anything entirely inside the range * is deleted from the tree. + * + * inline_limit is used to tell this code which offsets in the file to keep + * if they contain inline extents. */ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, @@ -796,7 +825,9 @@ out: } /* - * this gets pages into the page cache and locks them down + * this gets pages into the page cache and locks them down, it also properly + * waits for data=ordered extents to finish before allowing the pages to be + * modified. */ static int noinline prepare_pages(struct btrfs_root *root, struct file *file, struct page **pages, size_t num_pages, @@ -1034,6 +1065,17 @@ int btrfs_release_file(struct inode * inode, struct file * filp) return 0; } +/* + * fsync call for both files and directories. This logs the inode into + * the tree log instead of forcing full commits whenever possible. + * + * It needs to call filemap_fdatawait so that all ordered extent updates are + * in the metadata btree are up to date for copying to the log. + * + * It drops the inode mutex before doing the tree log commit. This is an + * important optimization for directories because holding the mutex prevents + * new operations on the dir while we write to disk. + */ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 404704d26822..f3abecc2d14c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -83,6 +83,10 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { static void btrfs_truncate(struct inode *inode); +/* + * a very lame attempt at stopping writes when the FS is 85% full. There + * are countless ways this is incorrect, but it is better than nothing. + */ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, int for_del) { @@ -108,6 +112,12 @@ int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, return ret; } +/* + * when extent_io.c finds a delayed allocation range in the file, + * the call backs end up in this code. The basic idea is to + * allocate extents on disk for the range, and create ordered data structs + * in ram to track those extents. + */ static int cow_file_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -185,6 +195,13 @@ out: return ret; } +/* + * when nowcow writeback call back. This checks for snapshots or COW copies + * of the extents that exist in the file, and COWs the file as required. + * + * If no cow copies or snapshots exist, we write directly to the existing + * blocks on disk + */ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end) { u64 extent_start; @@ -291,6 +308,9 @@ out: return err; } +/* + * extent_io.c call back to do delayed allocation processing + */ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -305,6 +325,11 @@ static int run_delalloc_range(struct inode *inode, u64 start, u64 end) return ret; } +/* + * extent_io.c set_bit_hook, used to track delayed allocation + * bytes in this file, and to maintain the list of inodes that + * have pending delalloc work to be done. + */ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -323,6 +348,9 @@ int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c clear_bit_hook, see set_bit_hook for why + */ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { @@ -349,6 +377,10 @@ int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, return 0; } +/* + * extent_io.c merge_bio_hook, this must check the chunk tree to make sure + * we don't create bios that span stripes or chunks + */ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, size_t size, struct bio *bio) { @@ -371,6 +403,14 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, return 0; } +/* + * in order to insert checksums into the metadata in large chunks, + * we wait until bio submission time. All the pages in the bio are + * checksummed and sums are attached onto the ordered extent record. + * + * At IO completion time the cums attached on the ordered extent record + * are inserted into the btree + */ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -383,6 +423,10 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(root, rw, bio, mirror_num, 1); } +/* + * extent_io.c submission hook. This does the right thing for csum calculation on write, + * or reading the csums from the tree before a read + */ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -408,6 +452,10 @@ mapit: return btrfs_map_bio(root, rw, bio, mirror_num, 0); } +/* + * given a list of ordered sums record them in the inode. This happens + * at IO completion time based on sums calculated at bio submission time. + */ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) @@ -430,12 +478,12 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) GFP_NOFS); } +/* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; struct btrfs_work work; }; -/* see btrfs_writepage_start_hook for details on why this is required */ void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; @@ -522,6 +570,10 @@ int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) return -EAGAIN; } +/* as ordered data IO finishes, this gets called so we can finish + * an ordered extent if the range of bytes in the file it covers are + * fully written. + */ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -631,6 +683,14 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, return btrfs_finish_ordered_io(page->mapping->host, start, end); } +/* + * When IO fails, either with EIO or csum verification fails, we + * try other mirrors that might have a good copy of the data. This + * io_failure_record is used to record state as we go through all the + * mirrors. If another mirror has good data, the page is set up to date + * and things continue. If a good mirror can't be found, the original + * bio end_io callback is called to indicate things have failed. + */ struct io_failure_record { struct page *page; u64 start; @@ -725,6 +785,10 @@ int btrfs_io_failed_hook(struct bio *failed_bio, return 0; } +/* + * each time an IO finishes, we do a fast check in the IO failure tree + * to see if we need to process or clean up an io_failure_record + */ int btrfs_clean_io_failures(struct inode *inode, u64 start) { u64 private; @@ -753,6 +817,11 @@ int btrfs_clean_io_failures(struct inode *inode, u64 start) return 0; } +/* + * when reads are done, we need to check csums to verify the data is correct + * if there's a match, we allow the bio to finish. If not, we go through + * the io_failure_record routines to find good copies + */ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -990,6 +1059,9 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) btrfs_free_path(path); } +/* + * read an inode from the btree into the in-memory inode + */ void btrfs_read_locked_inode(struct inode *inode) { struct btrfs_path *path; @@ -1083,6 +1155,9 @@ make_bad: make_bad_inode(inode); } +/* + * given a leaf and an inode, copy the inode fields into the leaf + */ static void fill_inode_item(struct btrfs_trans_handle *trans, struct extent_buffer *leaf, struct btrfs_inode_item *item, @@ -1118,6 +1193,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, BTRFS_I(inode)->block_group->key.objectid); } +/* + * copy everything in the in-memory inode into the btree. + */ int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) @@ -1151,6 +1229,11 @@ failed: } +/* + * unlink helper that gets used here in inode.c and in the tree logging + * recovery code. It remove a link in a directory with a given name, and + * also drops the back refs in the inode to the directory + */ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, struct inode *inode, @@ -1309,7 +1392,7 @@ fail: /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find - * any higher than i_size. + * any higher than new_size * * csum items that cross the new i_size are truncated to the new size * as well. @@ -2123,6 +2206,11 @@ void btrfs_dirty_inode(struct inode *inode) btrfs_end_transaction(trans, root); } +/* + * find the highest existing sequence number in a directory + * and then set the in-memory index_cnt variable to reflect + * free sequence numbers + */ static int btrfs_set_inode_index_count(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -2175,6 +2263,10 @@ out: return ret; } +/* + * helper to find a free sequence number in a given directory. This current + * code is very simple, later versions will do smarter things in the btree + */ static int btrfs_set_inode_index(struct inode *dir, struct inode *inode, u64 *index) { @@ -2305,6 +2397,12 @@ static inline u8 btrfs_inode_type(struct inode *inode) return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT]; } +/* + * utility function to add 'inode' into 'parent_inode' with + * a give name and a given sequence number. + * if 'add_backref' is true, also insert a backref from the + * inode to the parent directory. + */ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) @@ -2611,6 +2709,10 @@ out_unlock: return err; } +/* helper for btfs_get_extent. Given an existing extent in the tree, + * and an extent that you want to insert, deal with overlap and insert + * the new extent into the tree. + */ static int merge_extent_mapping(struct extent_map_tree *em_tree, struct extent_map *existing, struct extent_map *em, @@ -2627,6 +2729,14 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, return add_extent_mapping(em_tree, em); } +/* + * a bit scary, this does extent mapping from logical file offset to the disk. + * the ugly parts come from merging extents from the disk with the + * in-ram representation. This gets more complex because of the data=ordered code, + * where the in-ram extents might be locked pending data=ordered completion. + * + * This also copies inline extents directly into the page. + */ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) @@ -2869,76 +2979,11 @@ out: return em; } -#if 0 /* waiting for O_DIRECT reads */ -static int btrfs_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - struct extent_map *em; - u64 start = (u64)iblock << inode->i_blkbits; - struct btrfs_multi_bio *multi = NULL; - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 len; - u64 logical; - u64 map_length; - int ret = 0; - - em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0); - - if (!em || IS_ERR(em)) - goto out; - - if (em->start > start || em->start + em->len <= start) { - goto out; - } - - if (em->block_start == EXTENT_MAP_INLINE) { - ret = -EINVAL; - goto out; - } - - len = em->start + em->len - start; - len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size))); - - if (em->block_start == EXTENT_MAP_HOLE || - em->block_start == EXTENT_MAP_DELALLOC) { - bh_result->b_size = len; - goto out; - } - - logical = start - em->start; - logical = em->block_start + logical; - - map_length = len; - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - logical, &map_length, &multi, 0); - BUG_ON(ret); - bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits; - bh_result->b_size = min(map_length, len); - - bh_result->b_bdev = multi->stripes[0].dev->bdev; - set_buffer_mapped(bh_result); - kfree(multi); -out: - free_extent_map(em); - return ret; -} -#endif - static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { return -EINVAL; -#if 0 - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - if (rw == WRITE) - return -EINVAL; - - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, btrfs_get_block, NULL); -#endif } static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) @@ -3202,6 +3247,9 @@ void btrfs_invalidate_dcache_root(struct btrfs_root *root, char *name, } } +/* + * create a new subvolume directory/inode (helper for the ioctl). + */ int btrfs_create_subvol_root(struct btrfs_root *new_root, struct btrfs_trans_handle *trans, u64 new_dirid, struct btrfs_block_group_cache *block_group) @@ -3223,6 +3271,9 @@ int btrfs_create_subvol_root(struct btrfs_root *new_root, return btrfs_update_inode(trans, new_root, inode); } +/* helper function for file defrag and space balancing. This + * forces readahead on a given range of bytes in an inode + */ unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index) @@ -3424,6 +3475,10 @@ out_unlock: return ret; } +/* + * some fairly slow code that needs optimization. This walks the list + * of all the inodes with pending delalloc and forces them to disk. + */ int btrfs_start_delalloc_inodes(struct btrfs_root *root) { struct list_head *head = &root->fs_info->delalloc_inodes; diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0cc314c10d66..e30aa6e2958f 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -25,6 +25,15 @@ #include "extent_io.h" #include "locking.h" +/* + * locks the per buffer mutex in an extent buffer. This uses adaptive locks + * and the spin is not tuned very extensively. The spinning does make a big + * difference in almost every workload, but spinning for the right amount of + * time needs some help. + * + * In general, we want to spin as long as the lock holder is doing btree searches, + * and we should give up if they are in more expensive code. + */ int btrfs_tree_lock(struct extent_buffer *eb) { int i; @@ -57,6 +66,10 @@ int btrfs_tree_locked(struct extent_buffer *eb) return mutex_is_locked(&eb->mutex); } +/* + * btrfs_search_slot uses this to decide if it should drop its locks + * before doing something expensive like allocating free blocks for cow. + */ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 951eacff2420..dcc1730dd837 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -26,7 +26,6 @@ #include "btrfs_inode.h" #include "extent_io.h" - static u64 entry_end(struct btrfs_ordered_extent *entry) { if (entry->file_offset + entry->len < entry->file_offset) @@ -34,6 +33,9 @@ static u64 entry_end(struct btrfs_ordered_extent *entry) return entry->file_offset + entry->len; } +/* returns NULL if the insertion worked, or it returns the node it did find + * in the tree + */ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, struct rb_node *node) { @@ -58,6 +60,10 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, return NULL; } +/* + * look for a given offset in the tree, and if it can't be found return the + * first lesser offset + */ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, struct rb_node **prev_ret) { @@ -108,6 +114,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, return NULL; } +/* + * helper to check if a given offset is inside a given entry + */ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) { if (file_offset < entry->file_offset || @@ -116,6 +125,10 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) return 1; } +/* + * look find the first ordered struct that has this offset, otherwise + * the first one less than this offset + */ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, u64 file_offset) { @@ -305,6 +318,10 @@ int btrfs_remove_ordered_extent(struct inode *inode, return 0; } +/* + * wait for all the ordered extents in a root. This is done when balancing + * space between drives. + */ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) { struct list_head splice; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 30fcb7aea5b5..a50ebb67055d 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -21,6 +21,16 @@ #include "ref-cache.h" #include "transaction.h" +/* + * leaf refs are used to cache the information about which extents + * a given leaf has references on. This allows us to process that leaf + * in btrfs_drop_snapshot without needing to read it back from disk. + */ + +/* + * kmalloc a leaf reference struct and update the counters for the + * total ref cache size + */ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, int nr_extents) { @@ -40,6 +50,10 @@ struct btrfs_leaf_ref *btrfs_alloc_leaf_ref(struct btrfs_root *root, return ref; } +/* + * free a leaf reference struct and update the counters for the + * total ref cache size + */ void btrfs_free_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { if (!ref) @@ -135,6 +149,10 @@ int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, return 0; } +/* + * find the leaf ref for a given extent. This returns the ref struct with + * a usage reference incremented + */ struct btrfs_leaf_ref *btrfs_lookup_leaf_ref(struct btrfs_root *root, u64 bytenr) { @@ -160,6 +178,10 @@ again: return NULL; } +/* + * add a fully filled in leaf ref struct + * remove all the refs older than a given root generation + */ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, int shared) { @@ -184,6 +206,10 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, return ret; } +/* + * remove a single leaf ref from the tree. This drops the ref held by the tree + * only + */ int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref) { struct btrfs_leaf_ref_tree *tree; diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 617564787f52..16f3183d7c59 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -19,8 +19,11 @@ #define __REFCACHE__ struct btrfs_extent_info { + /* bytenr and num_bytes find the extent in the extent allocation tree */ u64 bytenr; u64 num_bytes; + + /* objectid and offset find the back reference for the file */ u64 objectid; u64 offset; }; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 0091c01abb06..eb7f7655e9d5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -22,8 +22,10 @@ #include "print-tree.h" /* - * returns 0 on finding something, 1 if no more roots are there - * and < 0 on error + * search forward for a root, starting with objectid 'search_start' + * if a root key is found, the objectid we find is filled into 'found_objectid' + * and 0 is returned. < 0 is returned on error, 1 if there is nothing + * left in the tree. */ int btrfs_search_root(struct btrfs_root *root, u64 search_start, u64 *found_objectid) @@ -66,6 +68,11 @@ out: return ret; } +/* + * lookup the root with the highest offset for a given objectid. The key we do + * find is copied into 'key'. If we find something return 0, otherwise 1, < 0 + * on error. + */ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct btrfs_root_item *item, struct btrfs_key *key) { @@ -104,6 +111,9 @@ out: return ret; } +/* + * copy the data in 'item' into the btree + */ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key, struct btrfs_root_item *item) @@ -147,6 +157,12 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } +/* + * at mount time we want to find all the old transaction snapshots that were in + * the process of being deleted if we crashed. This is any root item with an offset + * lower than the latest root. They need to be queued for deletion to finish + * what was happening when we crashed. + */ int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid, struct btrfs_root *latest) { @@ -227,6 +243,7 @@ err: return ret; } +/* drop the root item for 'key' from 'root' */ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_key *key) { diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index ad03a32d1116..cdedbe144d45 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -17,6 +17,27 @@ */ #include + +/* this is some deeply nasty code. ctree.h has a different + * definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef + * + * The end result is that anyone who #includes ctree.h gets a + * declaration for the btrfs_set_foo functions and btrfs_foo functions + * + * This file declares the macros and then #includes ctree.h, which results + * in cpp creating the function here based on the template below. + * + * These setget functions do all the extent_buffer related mapping + * required to efficiently read and write specific fields in the extent + * buffers. Every pointer to metadata items in btrfs is really just + * an unsigned long offset into the extent buffer which has been + * cast to a specific type. This gives us all the gcc type checking. + * + * The extent buffer api is used to do all the kmapping and page + * spanning work required to get extent buffers in highmem and have + * a metadata blocksize different from the page size. + */ + #define BTRFS_SETGET_FUNCS(name, type, member, bits) \ u##bits btrfs_##name(struct extent_buffer *eb, \ type *s) \ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8399d6d05d63..2e6039825b7b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -519,6 +519,9 @@ static struct file_system_type btrfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +/* + * used by btrfsctl to scan devices when no FS is mounted + */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 444abe0796ae..11266d68a6c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -46,6 +46,9 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) } } +/* + * either allocate a new transaction or hop into the existing one + */ static noinline int join_transaction(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -85,6 +88,12 @@ static noinline int join_transaction(struct btrfs_root *root) return 0; } +/* + * this does all the record keeping required to make sure that a + * reference counted root is properly recorded in a given transaction. + * This is required to make sure the old root from before we joined the transaction + * is deleted when the transaction commits + */ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) { struct btrfs_dirty_root *dirty; @@ -127,6 +136,10 @@ noinline int btrfs_record_root_in_trans(struct btrfs_root *root) return 0; } +/* wait for commit against the current transaction to become unblocked + * when this is done, it is safe to start a new transaction, but the current + * transaction might not be fully on disk. + */ static void wait_current_trans(struct btrfs_root *root) { struct btrfs_transaction *cur_trans; @@ -198,7 +211,7 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, return start_transaction(r, num_blocks, 2); } - +/* wait for a transaction commit to be fully complete */ static noinline int wait_for_commit(struct btrfs_root *root, struct btrfs_transaction *commit) { @@ -218,6 +231,10 @@ static noinline int wait_for_commit(struct btrfs_root *root, return 0; } +/* + * rate limit against the drop_snapshot code. This helps to slow down new operations + * if the drop_snapshot code isn't able to keep up. + */ static void throttle_on_drops(struct btrfs_root *root) { struct btrfs_fs_info *info = root->fs_info; @@ -302,7 +319,11 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, return __btrfs_end_transaction(trans, root, 1); } - +/* + * when btree blocks are allocated, they have some corresponding bits set for + * them in one of two extent_io trees. This is used to make sure all of + * those extents are on disk for transaction or log commit + */ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, struct extent_io_tree *dirty_pages) { @@ -393,6 +414,16 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, &trans->transaction->dirty_pages); } +/* + * this is used to update the root pointer in the tree of tree roots. + * + * But, in the case of the extent allocation tree, updating the root + * pointer may allocate blocks which may change the root of the extent + * allocation tree. + * + * So, this loops and repeats and makes sure the cowonly root didn't + * change while the root pointer was being updated in the metadata. + */ static int update_cowonly_root(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -418,6 +449,9 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return 0; } +/* + * update all the cowonly tree roots on disk + */ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -433,6 +467,11 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, return 0; } +/* + * dead roots are old snapshots that need to be deleted. This allocates + * a dirty root struct and adds it into the list of dead roots that need to + * be deleted + */ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) { struct btrfs_dirty_root *dirty; @@ -449,6 +488,12 @@ int btrfs_add_dead_root(struct btrfs_root *root, struct btrfs_root *latest) return 0; } +/* + * at transaction commit time we need to schedule the old roots for + * deletion via btrfs_drop_snapshot. This runs through all the + * reference counted roots that were modified in the current + * transaction and puts them into the drop list + */ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, struct radix_tree_root *radix, struct list_head *list) @@ -541,6 +586,10 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans, return err; } +/* + * defrag a given btree. If cacheonly == 1, this won't read from the disk, + * otherwise every leaf in the btree is read and defragged. + */ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) { struct btrfs_fs_info *info = root->fs_info; @@ -570,6 +619,10 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +/* + * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on + * all of them + */ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, struct list_head *list) { @@ -664,6 +717,10 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, return ret; } +/* + * new snapshots need to be created at a very specific time in the + * transaction commit. This does the actual creation + */ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_pending_snapshot *pending) @@ -734,6 +791,9 @@ fail: return ret; } +/* + * create all the snapshots we've scheduled for creation + */ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { @@ -944,6 +1004,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; } +/* + * interface function to delete all the snapshots we have scheduled for deletion + */ int btrfs_clean_old_snapshots(struct btrfs_root *root) { struct list_head dirty_roots; diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b3bb5bbad76e..6f57d0889b1e 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -23,6 +23,10 @@ #include "transaction.h" #include "locking.h" +/* defrag all the leaves in a given btree. If cache_only == 1, don't read things + * from disk, otherwise read all the leaves and try to get key order to + * better reflect disk order + */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, struct btrfs_root *root, int cache_only) { -- cgit v1.2.3