diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/btrfs/backref.c | 4 | ||||
-rw-r--r-- | fs/btrfs/check-integrity.c | 4 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 2 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 8 | ||||
-rw-r--r-- | fs/btrfs/dev-replace.c | 6 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 74 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 10 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 102 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 36 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 121 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 8 | ||||
-rw-r--r-- | fs/btrfs/math.h | 6 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 7 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 8 | ||||
-rw-r--r-- | fs/btrfs/raid56.c | 3 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 27 | ||||
-rw-r--r-- | fs/btrfs/send.c | 171 | ||||
-rw-r--r-- | fs/btrfs/super.c | 16 | ||||
-rw-r--r-- | fs/btrfs/sysfs.h | 22 | ||||
-rw-r--r-- | fs/btrfs/tests/inode-tests.c | 197 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 51 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 3 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 91 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 8 |
28 files changed, 753 insertions, 264 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a66768ebc8d1..80e9c18ea64f 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -8,6 +8,7 @@ config BTRFS_FS select LZO_DECOMPRESS select RAID6_PQ select XOR_BLOCKS + select SRCU help Btrfs is a general purpose copy-on-write filesystem with extents, diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f55721ff9385..9de772ee0031 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, struct ulist *roots = NULL; struct ulist_iterator uiter; struct ulist_node *node; - struct seq_list elem = {}; + struct seq_list elem = SEQ_LIST_INIT(elem); int ret = 0; tmp = ulist_alloc(GFP_NOFS); @@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, struct ulist *roots = NULL; struct ulist_node *ref_node = NULL; struct ulist_node *root_node = NULL; - struct seq_list tree_mod_seq_elem = {}; + struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); struct ulist_iterator ref_uiter; struct ulist_iterator root_uiter; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 77ba1634c9ab..ce7dec88f4b8 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio) (unsigned long long)bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev); - mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, - GFP_NOFS); + mapped_datav = kmalloc_array(bio->bi_vcnt, + sizeof(*mapped_datav), GFP_NOFS); if (!mapped_datav) goto leave; cur_bytenr = dev_bytenr; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index e7a94f8cdfd7..ce62324c78e7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, cb->orig_bio = bio; nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE); - cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages, + cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!cb->compressed_pages) goto fail1; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e1a0981159ab..0f11ebc92f02 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, if (!tree_mod_need_log(fs_info, eb)) return 0; - tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags); + tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags); if (!tm_list) return -ENOMEM; @@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (log_removal && btrfs_header_level(old_root) > 0) { nritems = btrfs_header_nritems(old_root); - tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), + tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), flags); if (!tm_list) { ret = -ENOMEM; @@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) return 0; - tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *), + tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *), GFP_NOFS); if (!tm_list) return -ENOMEM; @@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) return 0; nritems = btrfs_header_nritems(eb); - tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *), - GFP_NOFS); + tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS); if (!tm_list) return -ENOMEM; @@ -1645,14 +1644,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, parent_nritems = btrfs_header_nritems(parent); blocksize = root->nodesize; - end_slot = parent_nritems; + end_slot = parent_nritems - 1; - if (parent_nritems == 1) + if (parent_nritems <= 1) return 0; btrfs_set_lock_blocking(parent); - for (i = start_slot; i < end_slot; i++) { + for (i = start_slot; i <= end_slot; i++) { int close = 1; btrfs_node_key(parent, &disk_key, i); @@ -1669,7 +1668,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (!close && i < end_slot - 2) { + if (!close && i < end_slot) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 216056c37940..95944b81ed5c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1176,6 +1176,7 @@ struct btrfs_space_info { struct percpu_counter total_bytes_pinned; struct list_head list; + /* Protected by the spinlock 'lock'. */ struct list_head ro_bgs; struct rw_semaphore groups_sem; @@ -1328,6 +1329,8 @@ struct seq_list { u64 seq; }; +#define SEQ_LIST_INIT(name) { .list = LIST_HEAD_INIT((name).list), .seq = 0 } + enum btrfs_orphan_cleanup_state { ORPHAN_CLEANUP_STARTED = 1, ORPHAN_CLEANUP_DONE = 2, @@ -3386,6 +3389,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); @@ -3909,6 +3914,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, loff_t actual_len, u64 *alloc_hint); int btrfs_inode_check_errors(struct inode *inode); extern const struct dentry_operations btrfs_dentry_operations; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode); +#endif /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 5ec03d999c37..0573848c7333 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: srcdev = dev_replace->srcdev; - args->status.progress_1000 = div64_u64(dev_replace->cursor_left, - div64_u64(btrfs_device_get_total_bytes(srcdev), 1000)); + args->status.progress_1000 = div_u64(dev_replace->cursor_left, + div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); break; } btrfs_dev_replace_unlock(dev_replace); @@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data) btrfs_dev_replace_status(fs_info, status_args); progress = status_args->status.progress_1000; kfree(status_args); - do_div(progress, 10); + progress = div_u64(progress, 10); printk_in_rcu(KERN_INFO "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", dev_replace->srcdev->missing ? "<missing disk>" : diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f770e8b5cb86..23c49ab2de4c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -303,7 +303,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info, offset += cur_len; } if (csum_size > sizeof(inline_result)) { - result = kzalloc(csum_size * sizeof(char), GFP_NOFS); + result = kzalloc(csum_size, GFP_NOFS); if (!result) return 1; } else { @@ -1724,12 +1724,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { int err; - bdi->capabilities = BDI_CAP_MAP_COPY; - err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); + err = bdi_setup_and_register(bdi, "btrfs"); if (err) return err; - bdi->ra_pages = default_backing_dev_info.ra_pages; + bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE; bdi->congested_fn = btrfs_congested_fn; bdi->congested_data = info; return 0; @@ -2181,7 +2180,6 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info, */ fs_info->btree_inode->i_size = OFFSET_MAX; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, @@ -2530,7 +2528,7 @@ int open_ctree(struct super_block *sb, fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; - fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64); + fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); spin_lock_init(&fs_info->reada_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 262e16fe27d8..d8ce3cfe7ae0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2561,8 +2561,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ spin_lock(&delayed_refs->lock); avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; - avg = div64_u64(avg, 4); - fs_info->avg_delayed_ref_runtime = avg; + fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */ spin_unlock(&delayed_refs->lock); } return 0; @@ -2624,7 +2623,7 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads) * We don't ever fill up leaves all the way so multiply by 2 just to be * closer to what we're really going to want to ouse. */ - return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); + return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root)); } int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, @@ -3193,7 +3192,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, struct inode *inode = NULL; u64 alloc_hint = 0; int dcs = BTRFS_DC_ERROR; - int num_pages = 0; + u64 num_pages = 0; int retries = 0; int ret = 0; @@ -3208,6 +3207,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, return 0; } + if (trans->aborted) + return 0; again: inode = lookup_free_space_inode(root, block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { @@ -3243,6 +3244,20 @@ again: */ BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, root, inode); + if (ret) { + /* + * So theoretically we could recover from this, simply set the + * super cache generation to 0 so we know to invalidate the + * cache, but then we'd have to keep track of the block groups + * that fail this way so we know we _have_ to reset this cache + * before the next commit or risk reading stale cache. So to + * limit our exposure to horrible edge cases lets just abort the + * transaction, this only happens in really bad situations + * anyway. + */ + btrfs_abort_transaction(trans, root, ret); + goto out_put; + } WARN_ON(ret); if (i_size_read(inode) > 0) { @@ -3277,7 +3292,7 @@ again: * taking up quite a bit since it's not folded into the other space * cache. */ - num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024); + num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024); if (!num_pages) num_pages = 1; @@ -3309,6 +3324,32 @@ out: return ret; } +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache, *tmp; + struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_path *path; + + if (list_empty(&cur_trans->dirty_bgs) || + !btrfs_test_opt(root, SPACE_CACHE)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* Could add new block groups, use _safe just in case */ + list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, + dirty_list) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) + cache_save_setup(cache, trans, path); + } + + btrfs_free_path(path); + return 0; +} + int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -4770,10 +4811,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * csum_size * 2; - num_bytes += div64_u64(data_used + meta_used, 50); + num_bytes += div_u64(data_used + meta_used, 50); if (num_bytes * 3 > meta_used) - num_bytes = div64_u64(meta_used, 3); + num_bytes = div_u64(meta_used, 3); return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10); } @@ -5033,16 +5074,16 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes, BTRFS_I(inode)->csum_bytes == 0) return 0; - old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + old_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); if (reserve) BTRFS_I(inode)->csum_bytes += num_bytes; else BTRFS_I(inode)->csum_bytes -= num_bytes; csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item); - num_csums_per_leaf = (int)div64_u64(csum_size, + num_csums_per_leaf = (int)div_u64(csum_size, sizeof(struct btrfs_csum_item) + sizeof(struct btrfs_disk_key)); - num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); + num_csums = (int)div_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize); num_csums = num_csums + num_csums_per_leaf - 1; num_csums = num_csums / num_csums_per_leaf; @@ -5094,7 +5135,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; + nr_extents = (unsigned)div64_u64(num_bytes + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + BTRFS_I(inode)->outstanding_extents += nr_extents; + nr_extents = 0; if (BTRFS_I(inode)->outstanding_extents > BTRFS_I(inode)->reserved_extents) @@ -5239,6 +5284,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) if (dropped > 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); + if (btrfs_test_is_dummy_root(root)) + return; + trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); if (root->fs_info->quota_enabled) { @@ -8671,7 +8719,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) min_free <<= 1; } else if (index == BTRFS_RAID_RAID0) { dev_min = fs_devices->rw_devices; - do_div(min_free, dev_min); + min_free = div64_u64(min_free, dev_min); } /* We need to do this so that we can look at pending chunks */ @@ -9359,7 +9407,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * are still on the list after taking the semaphore */ list_del_init(&block_group->list); - list_del_init(&block_group->ro_list); if (list_empty(&block_group->space_info->block_groups[index])) { kobj = block_group->space_info->block_group_kobjs[index]; block_group->space_info->block_group_kobjs[index] = NULL; @@ -9408,6 +9455,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, btrfs_remove_free_space_cache(block_group); spin_lock(&block_group->space_info->lock); + list_del_init(&block_group->ro_list); block_group->space_info->total_bytes -= block_group->key.offset; block_group->space_info->bytes_readonly -= block_group->key.offset; block_group->space_info->disk_total -= block_group->key.offset * factor; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 29850d4a3827..d688cfe5d496 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1408,8 +1408,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) while (index <= end_index) { page = find_get_page(inode->i_mapping, index); BUG_ON(!page); /* Pages should be in the extent_io_tree */ - account_page_redirty(page); __set_page_dirty_nobuffers(page); + account_page_redirty(page); page_cache_release(page); index++; } @@ -2191,7 +2191,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end) next = next_state(state); - failrec = (struct io_failure_record *)state->private; + failrec = (struct io_failure_record *)(unsigned long)state->private; free_extent_state(state); kfree(failrec); @@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb) /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { + __free_extent_buffer(eb); + return 1; + } +#endif call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); return 1; } diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fc003321bdd4..58ece6558430 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits; if (!dst) { if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { - btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size, - GFP_NOFS); + btrfs_bio->csum_allocated = kmalloc_array(nblocks, + csum_size, GFP_NOFS); if (!btrfs_bio->csum_allocated) { btrfs_free_path(path); return -ENOMEM; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6b796f03de10..7d4bb3b6fbc2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) defrag = rb_entry(node, struct inode_defrag, rb_node); kmem_cache_free(btrfs_inode_defrag_cachep, defrag); - if (need_resched()) { - spin_unlock(&fs_info->defrag_inodes_lock); - cond_resched(); - spin_lock(&fs_info->defrag_inodes_lock); - } + cond_resched_lock(&fs_info->defrag_inodes_lock); node = rb_first(&fs_info->defrag_inodes); } @@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, PAGE_CACHE_SIZE / (sizeof(struct page *))); nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied); nrptrs = max(nrptrs, 8); - pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); + pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL); if (!pages) return -ENOMEM; @@ -1635,8 +1631,8 @@ again: btrfs_end_write_no_snapshoting(root); if (only_release_metadata && copied > 0) { - u64 lockstart = round_down(pos, root->sectorsize); - u64 lockend = lockstart + + lockstart = round_down(pos, root->sectorsize); + lockend = lockstart + (dirty_pages << PAGE_CACHE_SHIFT) - 1; set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, @@ -1746,7 +1742,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_lock(&inode->i_mutex); - current->backing_dev_info = inode->i_mapping->backing_dev_info; + current->backing_dev_info = inode_to_bdi(inode); err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) { mutex_unlock(&inode->i_mutex); @@ -1811,22 +1807,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); /* - * we want to make sure fsync finds this change - * but we haven't joined a transaction running right now. - * - * Later on, someone is sure to update the inode and get the - * real transid recorded. - * - * We set last_trans now to the fs_info generation + 1, - * this will either be one more than the running transaction - * or the generation used for the next transaction if there isn't - * one running right now. - * * We also have to set last_sub_trans to the current log transid, * otherwise subsequent syncs to a file that's been synced in this * transaction will appear to have already occured. */ - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; BTRFS_I(inode)->last_sub_trans = root->log_transid; if (num_written > 0) { err = generic_write_sync(file, pos, num_written); @@ -1959,25 +1943,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* - * check the transaction that last modified this inode - * and see if its already been committed - */ - if (!BTRFS_I(inode)->last_trans) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - /* - * if the last transaction that changed this file was before - * the current transaction, we can bail out now without any - * syncing + * If the last transaction that changed this file was before the current + * transaction and we have the full sync flag set in our inode, we can + * bail out now without any syncing. + * + * Note that we can't bail out if the full sync flag isn't set. This is + * because when the full sync flag is set we start all ordered extents + * and wait for them to fully complete - when they complete they update + * the inode's last_trans field through: + * + * btrfs_finish_ordered_io() -> + * btrfs_update_inode_fallback() -> + * btrfs_update_inode() -> + * btrfs_set_inode_last_trans() + * + * So we are sure that last_trans is up to date and can do this check to + * bail out safely. For the fast path, when the full sync flag is not + * set in our inode, we can not do it because we start only our ordered + * extents and don't wait for them to complete (that is when + * btrfs_finish_ordered_io runs), so here at this point their last_trans + * value might be less than or equals to fs_info->last_trans_committed, + * and setting a speculative last_trans for an inode when a buffered + * write is made (such as fs_info->generation + 1 for example) would not + * be reliable since after setting the value and before fsync is called + * any number of transactions can start and commit (transaction kthread + * commits the current transaction periodically), and a transaction + * commit does not start nor waits for ordered extents to complete. */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed) { - BTRFS_I(inode)->last_trans = 0; - + (full_sync && BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed)) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2081,7 +2077,6 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = btrfs_page_mkwrite, - .remap_pages = generic_file_remap_pages, }; static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) @@ -2276,6 +2271,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) bool same_page; bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); u64 ino_size; + bool truncated_page = false; + bool updated_inode = false; ret = btrfs_wait_ordered_range(inode, offset, len); if (ret) @@ -2307,13 +2304,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) * entire page. */ if (same_page && len < PAGE_CACHE_SIZE) { - if (offset < ino_size) + if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, len, 0); + } else { + ret = 0; + } goto out_only_mutex; } /* zero back part of the first page */ if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, 0, 0); if (ret) { mutex_unlock(&inode->i_mutex); @@ -2349,6 +2351,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!ret) { /* zero the front end of the last page */ if (tail_start + tail_len < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, tail_start + tail_len, 0, 1); if (ret) @@ -2358,8 +2361,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (lockend < lockstart) { - mutex_unlock(&inode->i_mutex); - return 0; + ret = 0; + goto out_only_mutex; } while (1) { @@ -2507,6 +2510,7 @@ out_trans: trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); + updated_inode = true; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out_free: @@ -2516,6 +2520,22 @@ out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); out_only_mutex: + if (!updated_inode && truncated_page && !ret && !err) { + /* + * If we only end up zeroing part of a page, we still need to + * update the inode item, so that all the time fields are + * updated as well as the necessary btrfs inode in memory fields + * for detecting, at fsync time, if the inode isn't yet in the + * log tree or it's there but not up to date. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + } else { + err = btrfs_update_inode(trans, root, inode); + ret = btrfs_end_transaction(trans, root); + } + } mutex_unlock(&inode->i_mutex); if (ret && !err) err = ret; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a71978578fa7..764528a4f6fd 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -298,7 +298,7 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, memset(io_ctl, 0, sizeof(struct io_ctl)); - io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); + io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); if (!io_ctl->pages) return -ENOMEM; @@ -1298,11 +1298,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) { u64 bitmap_start; - u64 bytes_per_bitmap; + u32 bytes_per_bitmap; bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; bitmap_start = offset - ctl->start; - bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); + bitmap_start = div_u64(bitmap_start, bytes_per_bitmap); bitmap_start *= bytes_per_bitmap; bitmap_start += ctl->start; @@ -1521,10 +1521,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) u64 bitmap_bytes; u64 extent_bytes; u64 size = block_group->key.offset; - u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; - int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); + u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; + u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg); - max_bitmaps = max(max_bitmaps, 1); + max_bitmaps = max_t(u32, max_bitmaps, 1); ASSERT(ctl->total_bitmaps <= max_bitmaps); @@ -1537,7 +1537,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) max_bytes = MAX_CACHE_BYTES_PER_GIG; else max_bytes = MAX_CACHE_BYTES_PER_GIG * - div64_u64(size, 1024 * 1024 * 1024); + div_u64(size, 1024 * 1024 * 1024); /* * we want to account for 1 more bitmap than what we have so we can make @@ -1552,14 +1552,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) } /* - * we want the extent entry threshold to always be at most 1/2 the maxw + * we want the extent entry threshold to always be at most 1/2 the max * bytes we can have, or whatever is less than that. */ extent_bytes = max_bytes - bitmap_bytes; - extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2)); + extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); ctl->extents_thresh = - div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); + div_u64(extent_bytes, sizeof(struct btrfs_free_space)); } static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, @@ -1673,7 +1673,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, */ if (*bytes >= align) { tmp = entry->offset - ctl->start + align - 1; - do_div(tmp, align); + tmp = div64_u64(tmp, align); tmp = tmp * align + ctl->start; align_off = tmp - entry->offset; } else { @@ -2402,11 +2402,8 @@ static void __btrfs_remove_free_space_cache_locked( } else { free_bitmap(ctl, info); } - if (need_resched()) { - spin_unlock(&ctl->tree_lock); - cond_resched(); - spin_lock(&ctl->tree_lock); - } + + cond_resched_lock(&ctl->tree_lock); } } @@ -2431,11 +2428,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) WARN_ON(cluster->block_group != block_group); __btrfs_return_cluster_to_free_space(block_group, cluster); - if (need_resched()) { - spin_unlock(&ctl->tree_lock); - cond_resched(); - spin_lock(&ctl->tree_lock); - } + + cond_resched_lock(&ctl->tree_lock); } __btrfs_remove_free_space_cache_locked(ctl); spin_unlock(&ctl->tree_lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8564d8ce03de..bb088bd8f7f0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, static int btrfs_dirty_inode(struct inode *inode); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode) +{ + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; +} +#endif + static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -463,7 +470,7 @@ again: */ if (inode_need_compress(inode)) { WARN_ON(pages); - pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { /* just bail out to the uncompressed code */ goto cont; @@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode, u64 new_size; /* - * We need the largest size of the remaining extent to see if we - * need to add a new outstanding extent. Think of the following - * case - * - * [MEAX_EXTENT_SIZEx2 - 4k][4k] - * - * The new_size would just be 4k and we'd think we had enough - * outstanding extents for this if we only took one side of the - * split, same goes for the other direction. We need to see if - * the larger size still is the same amount of extents as the - * original size, because if it is we need to add a new - * outstanding extent. But if we split up and the larger size - * is less than the original then we are good to go since we've - * already accounted for the extra extent in our original - * accounting. + * See the explanation in btrfs_merge_extent_hook, the same + * applies here, just in reverse. */ new_size = orig->end - split + 1; - if ((split - orig->start) > new_size) - new_size = split - orig->start; - - num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); - if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) < num_extents) + new_size = split - orig->start; + num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; } @@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode, if (!(other->state & EXTENT_DELALLOC)) return; - old_size = other->end - other->start + 1; - new_size = old_size + (new->end - new->start + 1); + if (new->start > other->start) + new_size = new->end - other->start + 1; + else + new_size = other->end - new->start + 1; /* we're not bigger than the max, unreserve the space and go */ if (new_size <= BTRFS_MAX_EXTENT_SIZE) { @@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode, } /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. + * We have to add up either side to figure out how many extents were + * accounted for before we merged into one big extent. If the number of + * extents we accounted for is <= the amount we need for the new range + * then we can return, otherwise drop. Think of it like this + * + * [ 4k][MAX_SIZE] + * + * So we've grown the extent by a MAX_SIZE extent, this would mean we + * need 2 outstanding extents, on one side we have 1 and the other side + * we have 1 so they are == and we can return. But in this case + * + * [MAX_SIZE+4k][MAX_SIZE+4k] + * + * Each range on their own accounts for 2 extents, but merged together + * they are only 3 extents worth of accounting, so we need to drop in + * this case. */ + old_size = other->end - other->start + 1; num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); + old_size = new->end - new->start + 1; + num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) > num_extents) + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; spin_lock(&BTRFS_I(inode)->lock); @@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } + /* For sanity tests */ + if (btrfs_test_is_dummy_root(root)) + return; + __percpu_counter_add(&root->fs_info->delalloc_bytes, len, root->fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); @@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode, root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); + /* For sanity tests. */ + if (btrfs_test_is_dummy_root(root)) + return; + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && do_list && !(state->state & EXTENT_NORESERVE)) btrfs_free_reserved_data_space(inode, len); @@ -3670,7 +3693,6 @@ cache_acl: switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; @@ -3685,7 +3707,6 @@ cache_acl: case S_IFLNK: inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; break; default: inode->i_op = &btrfs_special_inode_operations; @@ -6167,7 +6188,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) @@ -7216,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 orig_len = len; + u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7228,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, lockstart = start; lockend = start + len - 1; + if (current->journal_info) { + /* + * Need to pull our outstanding extents and set journal_info to NULL so + * that anything that needs to check if there's a transction doesn't get + * confused. + */ + outstanding_extents = current->journal_info; + current->journal_info = NULL; + } + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -7288,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->block_start != EXTENT_MAP_HOLE)) { int type; - int ret; u64 block_start, orig_start, orig_block_len, ram_bytes; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) @@ -7352,11 +7381,20 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - if (len < orig_len) { + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (*outstanding_extents) { + (*outstanding_extents)--; + } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } + + current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); } @@ -7380,6 +7418,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); + if (outstanding_extents) + current->journal_info = outstanding_extents; return ret; } @@ -8079,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + u64 outstanding_extents = 0; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8116,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; + outstanding_extents = div64_u64(count + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + + /* + * We need to know how many extents we reserved so that we can + * do the accounting properly if we go over the number we + * originally calculated. Abuse current->journal_info for this. + */ + current->journal_info = &outstanding_extents; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_done(inode); @@ -8128,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { + current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); else if (ret >= 0 && (size_t)ret < count) @@ -9281,7 +9333,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); @@ -9325,7 +9376,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode->i_op = &btrfs_symlink_inode_operations; inode->i_mapping->a_ops = &btrfs_symlink_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); @@ -9537,7 +9587,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &btrfs_file_inode_operations; inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; ret = btrfs_init_inode_security(trans, inode, dir, NULL); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d49fe8a0f6b5..94cde83eb3da 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -776,11 +776,11 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) return -EPERM; if (isdir) { - if (!S_ISDIR(victim->d_inode->i_mode)) + if (!d_is_dir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; - } else if (S_ISDIR(victim->d_inode->i_mode)) + } else if (d_is_dir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; @@ -1564,7 +1564,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, goto out_free; } - do_div(new_size, root->sectorsize); + new_size = div_u64(new_size, root->sectorsize); new_size *= root->sectorsize; printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", @@ -3039,7 +3039,7 @@ out: static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 disko) { - struct seq_list tree_mod_seq_elem = {}; + struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); struct ulist *roots; struct ulist_iterator uiter; struct ulist_node *root_node = NULL; diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h index b7816cefbd13..1b10a3cd1195 100644 --- a/fs/btrfs/math.h +++ b/fs/btrfs/math.h @@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor) if (factor == 10) return num; num *= factor; - do_div(num, 10); - return num; + return div_u64(num, 10); } static inline u64 div_factor_fine(u64 num, int factor) @@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor) if (factor == 100) return num; num *= factor; - do_div(num, 100); - return num; + return div_u64(num, 100); } #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 534544e08f76..157cc54fc634 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode, continue; if (entry_end(ordered) <= start) break; - if (!list_empty(&ordered->log_list)) - continue; - if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) + if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) continue; list_add(&ordered->log_list, logged_list); atomic_inc(&ordered->refs); @@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); - if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) - list_add_tail(&ordered->trans_list, &trans->ordered); + list_add_tail(&ordered->trans_list, &trans->ordered); spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 50ee58614f72..33f31375259a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1, if (oper1->seq < oper2->seq) return -1; if (oper1->seq > oper2->seq) - return -1; + return 1; if (oper1->ref_root < oper2->ref_root) return -1; if (oper1->ref_root > oper2->ref_root) @@ -1845,7 +1845,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, struct ulist *roots = NULL; struct ulist *qgroups, *tmp; struct btrfs_qgroup *qgroup; - struct seq_list elem = {}; + struct seq_list elem = SEQ_LIST_INIT(elem); u64 seq; int old_roots = 0; int new_roots = 0; @@ -1967,7 +1967,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans, int err; struct btrfs_qgroup *qg; u64 root_obj = 0; - struct seq_list elem = {}; + struct seq_list elem = SEQ_LIST_INIT(elem); parents = ulist_alloc(GFP_NOFS); if (!parents) @@ -2522,7 +2522,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, { struct btrfs_key found; struct ulist *roots = NULL; - struct seq_list tree_mod_seq_elem = {}; + struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); u64 num_bytes; u64 seq; int new_roots; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1ccc40bb61f5..fa72068bd256 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1800,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) int err; int i; - pointers = kzalloc(rbio->real_stripes * sizeof(void *), - GFP_NOFS); + pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); if (!pointers) { err = -ENOMEM; goto cleanup_io; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4af5f49a3105..ab5811545a98 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) * the statistics. */ - sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS * - sizeof(*sblocks_for_recheck), - GFP_NOFS); + sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS, + sizeof(*sblocks_for_recheck), GFP_NOFS); if (!sblocks_for_recheck) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, unsigned long *bitmap, u64 start, u64 len) { - int offset; + u32 offset; int nsectors; int sectorsize = sparity->sctx->dev_root->sectorsize; @@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity, } start -= sparity->logic_start; - offset = (int)do_div(start, sparity->stripe_len); + start = div_u64_rem(start, sparity->stripe_len, &offset); offset /= sectorsize; nsectors = (int)len / sectorsize; @@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num, int j = 0; u64 stripe_nr; u64 last_offset; - int stripe_index; - int rot; + u32 stripe_index; + u32 rot; last_offset = (physical - map->stripes[num].physical) * nr_data_stripes(map); @@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num, for (i = 0; i < nr_data_stripes(map); i++) { *offset = last_offset + i * map->stripe_len; - stripe_nr = *offset; - do_div(stripe_nr, map->stripe_len); - do_div(stripe_nr, nr_data_stripes(map)); + stripe_nr = div_u64(*offset, map->stripe_len); + stripe_nr = div_u64(stripe_nr, nr_data_stripes(map)); /* Work out the disk rotation on this stripe-set */ - rot = do_div(stripe_nr, map->num_stripes); + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot); /* calculate which stripe this data locates */ rot += i; stripe_index = rot % map->num_stripes; @@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, int extent_mirror_num; int stop_loop = 0; - nstripes = length; physical = map->stripes[num].physical; offset = 0; - do_div(nstripes, map->stripe_len); + nstripes = div_u64(length, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID0) { offset = map->stripe_len * num; increment = map->stripe_len * map->num_stripes; @@ -3029,7 +3026,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, ppath = btrfs_alloc_path(); if (!ppath) { - btrfs_free_path(ppath); + btrfs_free_path(path); return -ENOMEM; } @@ -3041,6 +3038,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, path->search_commit_root = 1; path->skip_locking = 1; + ppath->search_commit_root = 1; + ppath->skip_locking = 1; /* * trigger the readahead for extent tree csum tree and wait for * completion. During readahead, the scrub is officially paused diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fe5857223515..d6033f540cc7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -230,6 +230,7 @@ struct pending_dir_move { u64 parent_ino; u64 ino; u64 gen; + bool is_orphan; struct list_head update_refs; }; @@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 ino_gen, u64 parent_ino, struct list_head *new_refs, - struct list_head *deleted_refs) + struct list_head *deleted_refs, + const bool is_orphan) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; struct rb_node *parent = NULL; @@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, pm->parent_ino = parent_ino; pm->ino = ino; pm->gen = ino_gen; + pm->is_orphan = is_orphan; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) rmdir_ino = dm->rmdir_ino; free_waiting_dir_move(sctx, dm); - ret = get_first_ref(sctx->parent_root, pm->ino, - &parent_ino, &parent_gen, name); - if (ret < 0) - goto out; - - ret = get_cur_path(sctx, parent_ino, parent_gen, - from_path); - if (ret < 0) - goto out; - ret = fs_path_add_path(from_path, name); + if (pm->is_orphan) { + ret = gen_unique_name(sctx, pm->ino, + pm->gen, from_path); + } else { + ret = get_first_ref(sctx->parent_root, pm->ino, + &parent_ino, &parent_gen, name); + if (ret < 0) + goto out; + ret = get_cur_path(sctx, parent_ino, parent_gen, + from_path); + if (ret < 0) + goto out; + ret = fs_path_add_path(from_path, name); + } if (ret < 0) goto out; @@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) LIST_HEAD(deleted_refs); ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, - &pm->update_refs, &deleted_refs); + &pm->update_refs, &deleted_refs, + pm->is_orphan); if (ret < 0) goto out; if (rmdir_ino) { @@ -3283,6 +3291,127 @@ out: return ret; } +/* + * We might need to delay a directory rename even when no ancestor directory + * (in the send root) with a higher inode number than ours (sctx->cur_ino) was + * renamed. This happens when we rename a directory to the old name (the name + * in the parent root) of some other unrelated directory that got its rename + * delayed due to some ancestor with higher number that got renamed. + * + * Example: + * + * Parent snapshot: + * . (ino 256) + * |---- a/ (ino 257) + * | |---- file (ino 260) + * | + * |---- b/ (ino 258) + * |---- c/ (ino 259) + * + * Send snapshot: + * . (ino 256) + * |---- a/ (ino 258) + * |---- x/ (ino 259) + * |---- y/ (ino 257) + * |----- file (ino 260) + * + * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 + * from 'a' to 'x/y' happening first, which in turn depends on the rename of + * inode 259 from 'c' to 'x'. So the order of rename commands the send stream + * must issue is: + * + * 1 - rename 259 from 'c' to 'x' + * 2 - rename 257 from 'a' to 'x/y' + * 3 - rename 258 from 'b' to 'a' + * + * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can + * be done right away and < 0 on error. + */ +static int wait_for_dest_dir_move(struct send_ctx *sctx, + struct recorded_ref *parent_ref, + const bool is_orphan) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key di_key; + struct btrfs_dir_item *di; + u64 left_gen; + u64 right_gen; + int ret = 0; + + if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) + return 0; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = parent_ref->dir; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); + + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } + + di = btrfs_match_dir_item_name(sctx->parent_root, path, + parent_ref->name, parent_ref->name_len); + if (!di) { + ret = 0; + goto out; + } + /* + * di_key.objectid has the number of the inode that has a dentry in the + * parent directory with the same name that sctx->cur_ino is being + * renamed to. We need to check if that inode is in the send root as + * well and if it is currently marked as an inode with a pending rename, + * if it is, we need to delay the rename of sctx->cur_ino as well, so + * that it happens after that other inode is renamed. + */ + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); + if (di_key.type != BTRFS_INODE_ITEM_KEY) { + ret = 0; + goto out; + } + + ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, + &left_gen, NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, + &right_gen, NULL, NULL, NULL, NULL); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + goto out; + } + + /* Different inode, no need to delay the rename of sctx->cur_ino */ + if (right_gen != left_gen) { + ret = 0; + goto out; + } + + if (is_waiting_for_move(sctx, di_key.objectid)) { + ret = add_pending_dir_move(sctx, + sctx->cur_ino, + sctx->cur_inode_gen, + di_key.objectid, + &sctx->new_refs, + &sctx->deleted_refs, + is_orphan); + if (!ret) + ret = 1; + } +out: + btrfs_free_path(path); + return ret; +} + static int wait_for_parent_move(struct send_ctx *sctx, struct recorded_ref *parent_ref) { @@ -3349,7 +3478,8 @@ out: sctx->cur_inode_gen, ino, &sctx->new_refs, - &sctx->deleted_refs); + &sctx->deleted_refs, + false); if (!ret) ret = 1; } @@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) int did_overwrite = 0; int is_orphan = 0; u64 last_dir_ino_rm = 0; + bool can_rename = true; verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); @@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { + ret = wait_for_dest_dir_move(sctx, cur, is_orphan); + if (ret < 0) + goto out; + if (ret == 1) { + can_rename = false; + *pending_move = 1; + } + } + /* * link/move the ref to the new place. If we have an orphan * inode, move it and update valid_path. If not, link or move * it depending on the inode mode. */ - if (is_orphan) { + if (is_orphan && can_rename) { ret = send_rename(sctx, valid_path, cur->full_path); if (ret < 0) goto out; @@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = fs_path_copy(valid_path, cur->full_path); if (ret < 0) goto out; - } else { + } else if (can_rename) { if (S_ISDIR(sctx->cur_inode_mode)) { /* * Dirs can't be linked, so move it. For moved diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 100a0442c413..db7d7b803a61 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1000,10 +1000,20 @@ int btrfs_sync_fs(struct super_block *sb, int wait) */ if (fs_info->pending_changes == 0) return 0; + /* + * A non-blocking test if the fs is frozen. We must not + * start a new transaction here otherwise a deadlock + * happens. The pending operations are delayed to the + * next commit after thawing. + */ + if (__sb_start_write(sb, SB_FREEZE_WRITE, false)) + __sb_end_write(sb, SB_FREEZE_WRITE); + else + return 0; trans = btrfs_start_transaction(root, 0); - } else { - return PTR_ERR(trans); } + if (IS_ERR(trans)) + return PTR_ERR(trans); } return btrfs_commit_transaction(trans, root); } @@ -1704,7 +1714,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) avail_space = device->total_bytes - device->bytes_used; /* align with stripe_len */ - do_div(avail_space, BTRFS_STRIPE_LEN); + avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN); avail_space *= BTRFS_STRIPE_LEN; /* diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f7dd298b3cf6..3a4bbed723fd 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h @@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \ BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) /* convert from attribute */ -#define to_btrfs_feature_attr(a) \ - container_of(a, struct btrfs_feature_attr, kobj_attr) -#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr) -#define attr_to_btrfs_feature_attr(a) \ - to_btrfs_feature_attr(attr_to_btrfs_attr(a)) +static inline struct btrfs_feature_attr * +to_btrfs_feature_attr(struct kobj_attribute *a) +{ + return container_of(a, struct btrfs_feature_attr, kobj_attr); +} + +static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr) +{ + return container_of(attr, struct kobj_attribute, attr); +} + +static inline struct btrfs_feature_attr * +attr_to_btrfs_feature_attr(struct attribute *attr) +{ + return to_btrfs_feature_attr(attr_to_btrfs_attr(attr)); +} + char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags); extern const char * const btrfs_feature_set_names[3]; extern struct kobj_type space_info_ktype; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index a116b55ce788..054fc0d97131 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -911,6 +911,197 @@ out: return ret; } +static int test_extent_accounting(void) +{ + struct inode *inode = NULL; + struct btrfs_root *root = NULL; + int ret = -ENOMEM; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Couldn't allocate inode\n"); + return ret; + } + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + goto out; + } + + root->fs_info = btrfs_alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + goto out; + } + + BTRFS_I(inode)->root = root; + btrfs_test_inode_set_ops(inode); + + /* [BTRFS_MAX_EXTENT_SIZE] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 1) { + ret = -EINVAL; + test_msg("Miscount, wanted 1, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, + BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + EXTENT_DELALLOC | EXTENT_DIRTY | + EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * + * I'm artificially adding 2 to outstanding_extents because in the + * buffered IO case we'd add things up as we go, but I don't feel like + * doing that here, this isn't the interesting case we want to test. + */ + BTRFS_I(inode)->outstanding_extents += 2; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, + (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * Refill the hole again just for good measure, because I thought it + * might fail and I'd rather satisfy my paranoia at this point. + */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* Empty */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents) { + ret = -EINVAL; + test_msg("Miscount, wanted 0, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + ret = 0; +out: + if (ret) + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + iput(inode); + btrfs_free_dummy_root(root); + return ret; +} + int btrfs_test_inodes(void) { int ret; @@ -924,5 +1115,9 @@ int btrfs_test_inodes(void) if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - return test_hole_first(); + ret = test_hole_first(); + if (ret) + return ret; + test_msg("Running outstanding_extents tests\n"); + return test_extent_accounting(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dde9d285308e..91c303ac40b6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -93,11 +93,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree) */ ASSERT(!waitqueue_active(&state->wq)); free_extent_state(state); - if (need_resched()) { - spin_unlock(&tree->lock); - cond_resched(); - spin_lock(&tree->lock); - } + + cond_resched_lock(&tree->lock); } spin_unlock(&tree->lock); } @@ -1023,17 +1020,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, u64 old_root_bytenr; u64 old_root_used; struct btrfs_root *tree_root = root->fs_info->tree_root; - bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID); old_root_used = btrfs_root_used(&root->root_item); - btrfs_write_dirty_block_groups(trans, root); while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start && - old_root_used == btrfs_root_used(&root->root_item) && - (!extent_root || - list_empty(&trans->transaction->dirty_bgs))) + old_root_used == btrfs_root_used(&root->root_item)) break; btrfs_set_root_node(&root->root_item, root->node); @@ -1044,17 +1037,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return ret; old_root_used = btrfs_root_used(&root->root_item); - if (extent_root) { - ret = btrfs_write_dirty_block_groups(trans, root); - if (ret) - return ret; - } - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; } return 0; @@ -1071,6 +1053,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; + struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; struct list_head *next; struct extent_buffer *eb; int ret; @@ -1098,11 +1081,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_setup_space_cache(trans, root); + if (ret) + return ret; + /* run_qgroups might have added some more refs */ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) return ret; - +again: while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); @@ -1115,8 +1102,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ret = update_cowonly_root(trans, root); if (ret) return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; + } + + while (!list_empty(dirty_bgs)) { + ret = btrfs_write_dirty_block_groups(trans, root); + if (ret) + return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; } + if (!list_empty(&fs_info->dirty_cowonly_roots)) + goto again; + list_add_tail(&fs_info->extent_root->dirty_list, &trans->transaction->switch_commits); btrfs_after_dev_replace_commit(fs_info); @@ -1814,6 +1816,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); + if (unlikely(cur_trans->aborted)) + ret = cur_trans->aborted; + btrfs_put_transaction(cur_trans); return ret; @@ -2133,7 +2138,7 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) unsigned long prev; unsigned long bit; - prev = cmpxchg(&fs_info->pending_changes, 0, 0); + prev = xchg(&fs_info->pending_changes, 0); if (!prev) return; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4f59140c8c00..066e754b1294 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1012,7 +1012,7 @@ again: base = btrfs_item_ptr_offset(leaf, path->slots[0]); while (cur_offset < item_size) { - extref = (struct btrfs_inode_extref *)base + cur_offset; + extref = (struct btrfs_inode_extref *)(base + cur_offset); victim_name_len = btrfs_inode_extref_name_len(leaf, extref); @@ -2637,6 +2637,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { + blk_finish_plug(&plug); mutex_unlock(&log_root_tree->log_mutex); ret = root_log_ctx.log_ret; goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6d4fb246eb7e..a73acf496e10 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -366,8 +366,8 @@ loop_lock: btrfsic_submit_bio(cur->bi_rw, cur); num_run++; batch_run++; - if (need_resched()) - cond_resched(); + + cond_resched(); /* * we made progress, there is more work to do and the bdi @@ -400,8 +400,7 @@ loop_lock: * against it before looping */ last_waited = ioc->last_waited; - if (need_resched()) - cond_resched(); + cond_resched(); continue; } spin_lock(&device->io_lock); @@ -3017,7 +3016,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf, stripe_offset = btrfs_stripe_offset(leaf, stripe); stripe_length = btrfs_chunk_length(leaf, chunk); - do_div(stripe_length, factor); + stripe_length = div_u64(stripe_length, factor); if (stripe_offset < bargs->pend && stripe_offset + stripe_length > bargs->pstart) @@ -4280,7 +4279,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), max_chunk_size); - devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices, + devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info), GFP_NOFS); if (!devices_info) return -ENOMEM; @@ -4391,8 +4390,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, */ if (stripe_size * data_stripes > max_chunk_size) { u64 mask = (1ULL << 24) - 1; - stripe_size = max_chunk_size; - do_div(stripe_size, data_stripes); + + stripe_size = div_u64(max_chunk_size, data_stripes); /* bump the answer up to a 16MB boundary */ stripe_size = (stripe_size + mask) & ~mask; @@ -4404,10 +4403,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = devices_info[ndevs-1].max_avail; } - do_div(stripe_size, dev_stripes); + stripe_size = div_u64(stripe_size, dev_stripes); /* align to BTRFS_STRIPE_LEN */ - do_div(stripe_size, raid_stripe_len); + stripe_size = div_u64(stripe_size, raid_stripe_len); stripe_size *= raid_stripe_len; map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); @@ -4894,10 +4893,17 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) { struct btrfs_bio *bbio = kzalloc( + /* the size of the btrfs_bio */ sizeof(struct btrfs_bio) + + /* plus the variable array for the stripes */ sizeof(struct btrfs_bio_stripe) * (total_stripes) + + /* plus the variable array for the tgt dev */ sizeof(int) * (real_stripes) + - sizeof(u64) * (real_stripes), + /* + * plus the raid_map, which includes both the tgt dev + * and the stripes + */ + sizeof(u64) * (total_stripes), GFP_NOFS); if (!bbio) return NULL; @@ -4938,7 +4944,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 stripe_nr_orig; u64 stripe_nr_end; u64 stripe_len; - int stripe_index; + u32 stripe_index; int i; int ret = 0; int num_stripes; @@ -4979,7 +4985,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, * stripe_nr counts the total number of stripes we have to stride * to get to this block */ - do_div(stripe_nr, stripe_len); + stripe_nr = div64_u64(stripe_nr, stripe_len); stripe_offset = stripe_nr * stripe_len; BUG_ON(offset < stripe_offset); @@ -4995,7 +5001,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* allow a write of a full stripe, but make sure we don't * allow straddling of stripes */ - do_div(raid56_full_stripe_start, full_stripe_len); + raid56_full_stripe_start = div64_u64(raid56_full_stripe_start, + full_stripe_len); raid56_full_stripe_start *= full_stripe_len; } @@ -5120,7 +5127,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_index = 0; stripe_nr_orig = stripe_nr; stripe_nr_end = ALIGN(offset + *length, map->stripe_len); - do_div(stripe_nr_end, map->stripe_len); + stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); stripe_end_offset = stripe_nr_end * map->stripe_len - (offset + *length); @@ -5128,7 +5135,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (rw & REQ_DISCARD) num_stripes = min_t(u64, map->num_stripes, stripe_nr_end - stripe_nr_orig); - stripe_index = do_div(stripe_nr, map->num_stripes); + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, + &stripe_index); if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { @@ -5154,9 +5162,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - int factor = map->num_stripes / map->sub_stripes; + u32 factor = map->num_stripes / map->sub_stripes; - stripe_index = do_div(stripe_nr, factor); + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index *= map->sub_stripes; if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) @@ -5182,8 +5190,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ - stripe_nr = raid56_full_stripe_start; - do_div(stripe_nr, stripe_len * nr_data_stripes(map)); + stripe_nr = div_u64(raid56_full_stripe_start, + stripe_len * nr_data_stripes(map)); /* RAID[56] write or recovery. Return all stripes */ num_stripes = map->num_stripes; @@ -5193,32 +5201,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_index = 0; stripe_offset = 0; } else { - u64 tmp; - /* * Mirror #0 or #1 means the original data block. * Mirror #2 is RAID5 parity block. * Mirror #3 is RAID6 Q block. */ - stripe_index = do_div(stripe_nr, nr_data_stripes(map)); + stripe_nr = div_u64_rem(stripe_nr, + nr_data_stripes(map), &stripe_index); if (mirror_num > 1) stripe_index = nr_data_stripes(map) + mirror_num - 2; /* We distribute the parity blocks across stripes */ - tmp = stripe_nr + stripe_index; - stripe_index = do_div(tmp, map->num_stripes); + div_u64_rem(stripe_nr + stripe_index, map->num_stripes, + &stripe_index); if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && mirror_num <= 1) mirror_num = 1; } } else { /* - * after this do_div call, stripe_nr is the number of stripes - * on this device we have to walk to find the data, and - * stripe_index is the number of our device in the stripe array + * after this, stripe_nr is the number of stripes on this + * device we have to walk to find the data, and stripe_index is + * the number of our device in the stripe array */ - stripe_index = do_div(stripe_nr, map->num_stripes); + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, + &stripe_index); mirror_num = stripe_index + 1; } BUG_ON(stripe_index >= map->num_stripes); @@ -5245,7 +5253,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || mirror_num > 1)) { u64 tmp; - int i, rot; + unsigned rot; bbio->raid_map = (u64 *)((void *)bbio->stripes + sizeof(struct btrfs_bio_stripe) * @@ -5253,8 +5261,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, sizeof(int) * tgtdev_indexes); /* Work out the disk rotation on this stripe-set */ - tmp = stripe_nr; - rot = do_div(tmp, num_stripes); + div_u64_rem(stripe_nr, num_stripes, &rot); /* Fill in the logical address of each stripe */ tmp = stripe_nr * nr_data_stripes(map); @@ -5269,8 +5276,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } if (rw & REQ_DISCARD) { - int factor = 0; - int sub_stripes = 0; + u32 factor = 0; + u32 sub_stripes = 0; u64 stripes_per_dev = 0; u32 remaining_stripes = 0; u32 last_stripe = 0; @@ -5421,9 +5428,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } } if (found) { - u64 length = map->stripe_len; - - if (physical_of_found + length <= + if (physical_of_found + map->stripe_len <= dev_replace->cursor_left) { struct btrfs_bio_stripe *tgtdev_stripe = bbio->stripes + num_stripes; @@ -5519,15 +5524,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, rmap_len = map->stripe_len; if (map->type & BTRFS_BLOCK_GROUP_RAID10) - do_div(length, map->num_stripes / map->sub_stripes); + length = div_u64(length, map->num_stripes / map->sub_stripes); else if (map->type & BTRFS_BLOCK_GROUP_RAID0) - do_div(length, map->num_stripes); + length = div_u64(length, map->num_stripes); else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - do_div(length, nr_data_stripes(map)); + length = div_u64(length, nr_data_stripes(map)); rmap_len = map->stripe_len * nr_data_stripes(map); } - buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); + buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); BUG_ON(!buf); /* -ENOMEM */ for (i = 0; i < map->num_stripes; i++) { @@ -5538,11 +5543,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, continue; stripe_nr = physical - map->stripes[i].physical; - do_div(stripe_nr, map->stripe_len); + stripe_nr = div_u64(stripe_nr, map->stripe_len); if (map->type & BTRFS_BLOCK_GROUP_RAID10) { stripe_nr = stripe_nr * map->num_stripes + i; - do_div(stripe_nr, map->sub_stripes); + stripe_nr = div_u64(stripe_nr, map->sub_stripes); } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { stripe_nr = stripe_nr * map->num_stripes + i; } /* else if RAID[56], multiply by nr_data_stripes(). diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 47b19465f0dc..883b93623bc5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, name, name_len, -1); if (!di && (flags & XATTR_REPLACE)) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); else if (di) ret = btrfs_delete_one_dir_name(trans, root, path, di); goto out; @@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, ASSERT(mutex_is_locked(&inode->i_mutex)); di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name, name_len, 0); - if (!di) { + if (!di) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); + if (ret) goto out; - } btrfs_release_path(path); di = NULL; } |