summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c45
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/free-space-tree.c18
-rw-r--r--fs/btrfs/inode.c52
-rw-r--r--fs/btrfs/ioctl.c119
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/btrfs/sysfs.c35
-rw-r--r--fs/btrfs/sysfs.h5
-rw-r--r--fs/btrfs/tests/btrfs-tests.c10
-rw-r--r--fs/btrfs/tests/extent-io-tests.c12
-rw-r--r--fs/btrfs/tests/inode-tests.c8
-rw-r--r--fs/btrfs/tree-log.c14
19 files changed, 255 insertions, 104 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3d4581..5fb60ea7eee2 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
list_add_tail(&work->ordered_list, &wq->ordered_list);
spin_unlock_irqrestore(&wq->list_lock, flags);
}
- queue_work(wq->normal_wq, &work->normal_work);
trace_btrfs_work_queued(work);
+ queue_work(wq->normal_wq, &work->normal_work);
}
void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b90cd3776f8e..f6dac40f87ff 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
read_extent_buffer(eb, dest + bytes_left,
name_off, name_len);
if (eb != eb_in) {
- btrfs_tree_read_unlock_blocking(eb);
+ if (!path->skip_locking)
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
}
ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in) {
- atomic_inc(&eb->refs);
- btrfs_tree_read_lock(eb);
- btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ if (!path->skip_locking)
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+ path->nodes[0] = NULL;
+ path->locks[0] = 0;
}
btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7d6c..3346cd8f9910 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
faili = nr_pages - 1;
cb->nr_pages = nr_pages;
- /* In the parent-locked case, we only locked the range we are
- * interested in. In all other cases, we can opportunistically
- * cache decompressed data that goes beyond the requested range. */
- if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
- add_ra_bio_pages(inode, em_start + em_len, cb);
+ add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0be47e4b8136..b57daa895cea 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
*
*/
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- struct list_head *ins_list)
+ struct list_head *ins_list, bool *emitted)
{
struct btrfs_dir_item *di;
struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
if (over)
return 1;
+ *emitted = true;
}
return 0;
}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f70119f25421..0167853c84ae 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
int btrfs_should_delete_dir_index(struct list_head *del_list,
u64 index);
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
- struct list_head *ins_list);
+ struct list_head *ins_list, bool *emitted);
/* for init */
int __init btrfs_delayed_inode_init(void);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29f5117..4545e2e2ad45 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
{ .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
+ { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
{ .id = 0, .name_stem = "tree" },
};
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
int again;
struct btrfs_trans_handle *trans;
- set_freezable();
do {
again = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e7c97a3f344..392592dc7010 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
struct block_device *bdev;
int ret;
int nr = 0;
- int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
+ unsigned long this_bio_flag = 0;
set_page_extent_mapped(page);
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (!parent_locked)
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
break;
}
em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, end);
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- if (parent_locked)
- free_extent_state(cached);
- else
- unlock_extent_cached(tree, cur,
- cur + iosize - 1,
- &cached, GFP_NOFS);
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
- if (!parent_locked)
- unlock_extent(tree, cur, cur + iosize - 1);
+ unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num)
-{
- struct bio *bio = NULL;
- unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
- int ret;
-
- ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
- &bio_flags, READ, NULL);
- if (bio)
- ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
- return ret;
-}
-
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0377413bd4b9..880d5292e972 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
-#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
-int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36bd5845..53dbeaf6ce94 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
static unsigned long *alloc_bitmap(u32 bitmap_size)
{
+ void *mem;
+
+ /*
+ * The allocation size varies, observed numbers were < 4K up to 16K.
+ * Using vmalloc unconditionally would be too heavy, we'll try
+ * contiguous allocations first.
+ */
+ if (bitmap_size <= PAGE_SIZE)
+ return kzalloc(bitmap_size, GFP_NOFS);
+
+ mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
+ if (mem)
+ return mem;
+
return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
}
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = 0;
out:
- vfree(bitmap);
+ kvfree(bitmap);
if (ret)
btrfs_abort_transaction(trans, root, ret);
return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
ret = 0;
out:
- vfree(bitmap);
+ kvfree(bitmap);
if (ret)
btrfs_abort_transaction(trans, root, ret);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e28f3d4691af..d96f5cf38a2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
char *name_ptr;
int name_len;
int is_curr = 0; /* ctx->pos points to the current index? */
+ bool emitted;
/* FIXME, use a real flag for deciding about the key type */
if (root->fs_info->tree_root == root)
@@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (ret < 0)
goto err;
+ emitted = false;
while (1) {
leaf = path->nodes[0];
slot = path->slots[0];
@@ -5824,6 +5826,7 @@ skip:
if (over)
goto nopos;
+ emitted = true;
di_len = btrfs_dir_name_len(leaf, di) +
btrfs_dir_data_len(leaf, di) + sizeof(*di);
di_cur += di_len;
@@ -5836,11 +5839,20 @@ next:
if (key_type == BTRFS_DIR_INDEX_KEY) {
if (is_curr)
ctx->pos++;
- ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
+ ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
if (ret)
goto nopos;
}
+ /*
+ * If we haven't emitted any dir entry, we must not touch ctx->pos as
+ * it was was set to the termination value in previous call. We assume
+ * that "." and ".." were emitted if we reach this point and set the
+ * termination value as well for an empty directory.
+ */
+ if (ctx->pos > 2 && !emitted)
+ goto nopos;
+
/* Reached end of directory/root. Bump pos past the last item. */
ctx->pos++;
@@ -7116,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
if (ret)
return ERR_PTR(ret);
- em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
- ins.offset, ins.offset, ins.offset, 0);
- if (IS_ERR(em)) {
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
- return em;
- }
-
+ /*
+ * Create the ordered extent before the extent map. This is to avoid
+ * races with the fast fsync path that would lead to it logging file
+ * extent items that point to disk extents that were not yet written to.
+ * The fast fsync path collects ordered extents into a local list and
+ * then collects all the new extent maps, so we must create the ordered
+ * extent first and make sure the fast fsync path collects any new
+ * ordered extents after collecting new extent maps as well.
+ * The fsync path simply can not rely on inode_dio_wait() because it
+ * causes deadlock with AIO.
+ */
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
- free_extent_map(em);
return ERR_PTR(ret);
}
+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+ ins.offset, ins.offset, ins.offset, 0);
+ if (IS_ERR(em)) {
+ struct btrfs_ordered_extent *oe;
+
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
+ oe = btrfs_lookup_ordered_extent(inode, start);
+ ASSERT(oe);
+ if (WARN_ON(!oe))
+ return em;
+ set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
+ set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
+ btrfs_remove_ordered_extent(inode, oe);
+ /* Once for our lookup and once for the ordered extents tree. */
+ btrfs_put_ordered_extent(oe);
+ btrfs_put_ordered_extent(oe);
+ }
return em;
}
@@ -7954,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
kfree(dip);
+ dio_bio->bi_error = bio->bi_error;
dio_end_io(dio_bio, bio->bi_error);
if (io_bio->end_io)
@@ -8008,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
kfree(dip);
+ dio_bio->bi_error = bio->bi_error;
dio_end_io(dio_bio, bio->bi_error);
bio_put(bio);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 952172ca7e45..48aee9846329 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2794,24 +2794,29 @@ out:
static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
{
struct page *page;
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
page = grab_cache_page(inode->i_mapping, index);
if (!page)
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (!PageUptodate(page)) {
- if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
- 0))
- return NULL;
+ int ret;
+
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ERR_PTR(ret);
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
- return NULL;
+ return ERR_PTR(-EIO);
+ }
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ page_cache_release(page);
+ return ERR_PTR(-EAGAIN);
}
}
- unlock_page(page);
return page;
}
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
pgoff_t index = off >> PAGE_CACHE_SHIFT;
for (i = 0; i < num_pages; i++) {
+again:
pages[i] = extent_same_get_page(inode, index + i);
- if (!pages[i])
- return -ENOMEM;
+ if (IS_ERR(pages[i])) {
+ int err = PTR_ERR(pages[i]);
+
+ if (err == -EAGAIN)
+ goto again;
+ pages[i] = NULL;
+ return err;
+ }
}
return 0;
}
-static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+static int lock_extent_range(struct inode *inode, u64 off, u64 len,
+ bool retry_range_locking)
{
- /* do any pending delalloc/csum calc on src, one way or
- another, and lock file content */
+ /*
+ * Do any pending delalloc/csum calculations on inode, one way or
+ * another, and lock file content.
+ * The locking order is:
+ *
+ * 1) pages
+ * 2) range in the inode's io tree
+ */
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
if (ordered)
btrfs_put_ordered_extent(ordered);
+ if (!retry_range_locking)
+ return -EAGAIN;
btrfs_wait_ordered_range(inode, off, len);
}
+ return 0;
}
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
}
-static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
- struct inode *inode2, u64 loff2, u64 len)
+static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len,
+ bool retry_range_locking)
{
+ int ret;
+
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
}
- lock_extent_range(inode1, loff1, len);
- lock_extent_range(inode2, loff2, len);
+ ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
+ if (ret)
+ return ret;
+ ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
+ if (ret)
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
+ loff1 + len - 1);
+ return ret;
}
struct cmp_pages {
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
for (i = 0; i < cmp->num_pages; i++) {
pg = cmp->src_pages[i];
- if (pg)
+ if (pg) {
+ unlock_page(pg);
page_cache_release(pg);
+ }
pg = cmp->dst_pages[i];
- if (pg)
+ if (pg) {
+ unlock_page(pg);
page_cache_release(pg);
+ }
}
kfree(cmp->src_pages);
kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
src_page = cmp->src_pages[i];
dst_page = cmp->dst_pages[i];
+ ASSERT(PageLocked(src_page));
+ ASSERT(PageLocked(dst_page));
addr = kmap_atomic(src_page);
dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
goto out_unlock;
}
+again:
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
if (ret)
goto out_unlock;
if (same_inode)
- lock_extent_range(src, same_lock_start, same_lock_len);
+ ret = lock_extent_range(src, same_lock_start, same_lock_len,
+ false);
else
- btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+ ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
+ false);
+ /*
+ * If one of the inodes has dirty pages in the respective range or
+ * ordered extents, we need to flush dellaloc and wait for all ordered
+ * extents in the range. We must unlock the pages and the ranges in the
+ * io trees to avoid deadlocks when flushing delalloc (requires locking
+ * pages) and when waiting for ordered extents to complete (they require
+ * range locking).
+ */
+ if (ret == -EAGAIN) {
+ /*
+ * Ranges in the io trees already unlocked. Now unlock all
+ * pages before waiting for all IO to complete.
+ */
+ btrfs_cmp_data_free(&cmp);
+ if (same_inode) {
+ btrfs_wait_ordered_range(src, same_lock_start,
+ same_lock_len);
+ } else {
+ btrfs_wait_ordered_range(src, loff, len);
+ btrfs_wait_ordered_range(dst, dst_loff, len);
+ }
+ goto again;
+ }
+ ASSERT(ret == 0);
+ if (WARN_ON(ret)) {
+ /* ranges in the io trees already unlocked */
+ btrfs_cmp_data_free(&cmp);
+ return ret;
+ }
/* pass original length for comparison so we stay within i_size */
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
u64 lock_start = min_t(u64, off, destoff);
u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
- lock_extent_range(src, lock_start, lock_len);
+ ret = lock_extent_range(src, lock_start, lock_len, true);
} else {
- btrfs_double_extent_lock(src, off, inode, destoff, len);
+ ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
+ true);
+ }
+ ASSERT(ret == 0);
+ if (WARN_ON(ret)) {
+ /* ranges in the io trees already unlocked */
+ goto out_unlock;
}
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fd1c4d982463..2bd0011450df 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
root_objectid == BTRFS_TREE_LOG_OBJECTID ||
root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
root_objectid == BTRFS_UUID_TREE_OBJECTID ||
- root_objectid == BTRFS_QUOTA_TREE_OBJECTID)
+ root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
+ root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
return 1;
return 0;
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..2c849b08a91b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
err = btrfs_insert_fs_root(root->fs_info, root);
+ /*
+ * The root might have been inserted already, as before we look
+ * for orphan roots, log replay might have happened, which
+ * triggers a transaction commit and qgroup accounting, which
+ * in turn reads and inserts fs roots while doing backref
+ * walking.
+ */
+ if (err == -EEXIST)
+ err = 0;
if (err) {
- BUG_ON(err == -EEXIST);
btrfs_free_fs_root(root);
break;
}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949067..539e7b5e3f86 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
+BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid56),
BTRFS_FEAT_ATTR_PTR(skinny_metadata),
BTRFS_FEAT_ATTR_PTR(no_holes),
+ BTRFS_FEAT_ATTR_PTR(free_space_tree),
NULL
};
@@ -780,6 +782,39 @@ failure:
return error;
}
+
+/*
+ * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
+ * values in superblock. Call after any changes to incompat/compat_ro flags
+ */
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+ u64 bit, enum btrfs_feature_set set)
+{
+ struct btrfs_fs_devices *fs_devs;
+ struct kobject *fsid_kobj;
+ u64 features;
+ int ret;
+
+ if (!fs_info)
+ return;
+
+ features = get_features(fs_info, set);
+ ASSERT(bit & supported_feature_masks[set]);
+
+ fs_devs = fs_info->fs_devices;
+ fsid_kobj = &fs_devs->fsid_kobj;
+
+ if (!fsid_kobj->state_initialized)
+ return;
+
+ /*
+ * FIXME: this is too heavy to update just one value, ideally we'd like
+ * to use sysfs_update_group but some refactoring is needed first.
+ */
+ sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
+ ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
+}
+
static int btrfs_init_debugfs(void)
{
#ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522125a6..d7da1a4c2f6c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
- BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature)
+ BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
struct kobject *parent);
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
+void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
+ u64 bit, enum btrfs_feature_set set);
+
#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b30070..0e1e61a7ec23 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
{
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info)
return fs_info;
fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->fs_devices) {
kfree(fs_info);
return NULL;
}
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->super_copy) {
kfree(fs_info->fs_devices);
kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
{
struct btrfs_block_group_cache *cache;
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (!cache)
return NULL;
cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
+ GFP_KERNEL);
if (!cache->free_space_ctl) {
kfree(cache);
return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa297e053..669b58201e36 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
* test.
*/
for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
test_msg("Failed to allocate test page\n");
ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
* |--- delalloc ---|
* |--- search ---|
*/
- set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
start = 0;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
test_msg("Couldn't find the locked page\n");
goto out_bits;
}
- set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
*
* We are re-using our test_start from above since it works out well.
*/
- set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
start = test_start;
end = 0;
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
}
ret = 0;
out_bits:
- clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS);
+ clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
out:
if (locked_page)
page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
test_msg("Running extent buffer bitmap tests\n");
- bitmap = kmalloc(len, GFP_NOFS);
+ bitmap = kmalloc(len, GFP_KERNEL);
if (!bitmap) {
test_msg("Couldn't allocate test bitmap\n");
return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fdd28bc..e2d3da02deee 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
(BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
EXTENT_DELALLOC | EXTENT_DIRTY |
EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
BTRFS_MAX_EXTENT_SIZE+8191,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1096,7 +1096,7 @@ out:
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
- NULL, GFP_NOFS);
+ NULL, GFP_KERNEL);
iput(inode);
btrfs_free_dummy_root(root);
return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..978c3a810893 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_path *path,
struct list_head *logged_list,
- struct btrfs_log_ctx *ctx)
+ struct btrfs_log_ctx *ctx,
+ const u64 start,
+ const u64 end)
{
struct extent_map *em, *n;
struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
list_sort(NULL, &extents, extent_cmp);
-
+ /*
+ * Collect any new ordered extents within the range. This is to
+ * prevent logging file extent items without waiting for the disk
+ * location they point to being written. We do this only to deal
+ * with races against concurrent lockless direct IO writes.
+ */
+ btrfs_get_logged_extents(inode, logged_list, start, end);
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
goto out_unlock;
}
ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
- &logged_list, ctx);
+ &logged_list, ctx, start, end);
if (ret) {
err = ret;
goto out_unlock;