diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 223 |
1 files changed, 170 insertions, 53 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7516fb9c0bd5..4890d6f3ad15 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -44,6 +44,8 @@ #include "ext4_jbd2.h" #include "ext4_extents.h" +#include <trace/events/ext4.h> + static int ext4_ext_truncate_extend_restart(handle_t *handle, struct inode *inode, int needed) @@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, if (unlikely(!bh)) goto err; if (!bh_uptodate_or_lock(bh)) { + trace_ext4_ext_load_extent(inode, block, + path[ppos].p_block); if (bh_submit_read(bh) < 0) { put_bh(bh); goto err; @@ -1034,7 +1038,7 @@ cleanup: for (i = 0; i < depth; i++) { if (!ablocks[i]) continue; - ext4_free_blocks(handle, inode, 0, ablocks[i], 1, + ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, EXT4_FREE_BLOCKS_METADATA); } } @@ -1725,7 +1729,7 @@ repeat: BUG_ON(npath->p_depth != path->p_depth); eh = npath[depth].p_hdr; if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { - ext_debug("next leaf isnt full(%d)\n", + ext_debug("next leaf isn't full(%d)\n", le16_to_cpu(eh->eh_entries)); path = npath; goto repeat; @@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, if (err) return err; ext_debug("index is empty, remove it, free block %llu\n", leaf); - ext4_free_blocks(handle, inode, 0, leaf, 1, + ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); return err; } @@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, num = le32_to_cpu(ex->ee_block) + ee_len - from; start = ext4_ext_pblock(ex) + ee_len - num; ext_debug("free last %u blocks starting %llu\n", num, start); - ext4_free_blocks(handle, inode, 0, start, num, flags); + ext4_free_blocks(handle, inode, NULL, start, num, flags); } else if (from == le32_to_cpu(ex->ee_block) && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", @@ -2529,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) /* * This function is called by ext4_ext_map_blocks() if someone tries to write * to an uninitialized extent. It may result in splitting the uninitialized - * extent into multiple extents (upto three - one initialized and two + * extent into multiple extents (up to three - one initialized and two * uninitialized). * There are three possibilities: * a> There is no split required: Entire extent should be initialized @@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, { int i, depth; struct ext4_extent_header *eh; - struct ext4_extent *ex, *last_ex; + struct ext4_extent *last_ex; if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) return 0; depth = ext_depth(inode); eh = path[depth].p_hdr; - ex = path[depth].p_ext; if (unlikely(!eh->eh_entries)) { EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " @@ -3171,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, path, flags); /* * Flag the inode(non aio case) or end_io struct (aio case) - * that this IO needs to convertion to written when IO is + * that this IO needs to conversion to written when IO is * completed */ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { @@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; - struct ext4_extent_header *eh; struct ext4_extent newex, *ex; - ext4_fsblk_t newblock; + ext4_fsblk_t newblock = 0; int err = 0, depth, ret; unsigned int allocated = 0; struct ext4_allocation_request ar; @@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); + trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); /* check in cache */ if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { @@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, err = -EIO; goto out2; } - eh = path[depth].p_hdr; ex = path[depth].p_ext; if (ex) { @@ -3458,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_ext_mark_uninitialized(&newex); /* * io_end structure was created for every IO write to an - * uninitialized extent. To avoid unecessary conversion, + * uninitialized extent. To avoid unnecessary conversion, * here we flag the IO that really needs the conversion. * For non asycn direct IO case, flag the inode state - * that we need to perform convertion when IO is done. + * that we need to perform conversion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { @@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), + ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), ext4_ext_get_actual_len(&newex), 0); goto out2; } @@ -3525,6 +3527,8 @@ out2: ext4_ext_drop_refs(path); kfree(path); } + trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, + newblock, map->m_len, err ? err : allocated); return err ? err : allocated; } @@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; + trace_ext4_fallocate_enter(inode, offset, len, mode); map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because @@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ret = inode_newsize_ok(inode, (len + offset)); if (ret) { mutex_unlock(&inode->i_mutex); + trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); return ret; } retry: @@ -3717,6 +3723,8 @@ retry: goto retry; } mutex_unlock(&inode->i_mutex); + trace_ext4_fallocate_exit(inode, offset, max_blocks, + ret > 0 ? ret2 : ret); return ret > 0 ? ret2 : ret; } @@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, } return ret > 0 ? ret2 : ret; } + /* * Callback function called for each extent to gather FIEMAP information. */ @@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, struct ext4_ext_cache *newex, struct ext4_extent *ex, void *data) { - struct fiemap_extent_info *fieinfo = data; - unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; __u64 logical; __u64 physical; __u64 length; + loff_t size; __u32 flags = 0; - int error; + int ret = 0; + struct fiemap_extent_info *fieinfo = data; + unsigned char blksize_bits; - logical = (__u64)newex->ec_block << blksize_bits; + blksize_bits = inode->i_sb->s_blocksize_bits; + logical = (__u64)newex->ec_block << blksize_bits; if (newex->ec_start == 0) { - pgoff_t offset; - struct page *page; + /* + * No extent in extent-tree contains block @newex->ec_start, + * then the block may stay in 1)a hole or 2)delayed-extent. + * + * Holes or delayed-extents are processed as follows. + * 1. lookup dirty pages with specified range in pagecache. + * If no page is got, then there is no delayed-extent and + * return with EXT_CONTINUE. + * 2. find the 1st mapped buffer, + * 3. check if the mapped buffer is both in the request range + * and a delayed buffer. If not, there is no delayed-extent, + * then return. + * 4. a delayed-extent is found, the extent will be collected. + */ + ext4_lblk_t end = 0; + pgoff_t last_offset; + pgoff_t offset; + pgoff_t index; + struct page **pages = NULL; struct buffer_head *bh = NULL; + struct buffer_head *head = NULL; + unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); + + pages = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (pages == NULL) + return -ENOMEM; offset = logical >> PAGE_SHIFT; - page = find_get_page(inode->i_mapping, offset); - if (!page || !page_has_buffers(page)) - return EXT_CONTINUE; +repeat: + last_offset = offset; + head = NULL; + ret = find_get_pages_tag(inode->i_mapping, &offset, + PAGECACHE_TAG_DIRTY, nr_pages, pages); + + if (!(flags & FIEMAP_EXTENT_DELALLOC)) { + /* First time, try to find a mapped buffer. */ + if (ret == 0) { +out: + for (index = 0; index < ret; index++) + page_cache_release(pages[index]); + /* just a hole. */ + kfree(pages); + return EXT_CONTINUE; + } - bh = page_buffers(page); + /* Try to find the 1st mapped buffer. */ + end = ((__u64)pages[0]->index << PAGE_SHIFT) >> + blksize_bits; + if (!page_has_buffers(pages[0])) + goto out; + head = page_buffers(pages[0]); + if (!head) + goto out; - if (!bh) - return EXT_CONTINUE; + bh = head; + do { + if (buffer_mapped(bh)) { + /* get the 1st mapped buffer. */ + if (end > newex->ec_block + + newex->ec_len) + /* The buffer is out of + * the request range. + */ + goto out; + goto found_mapped_buffer; + } + bh = bh->b_this_page; + end++; + } while (bh != head); - if (buffer_delay(bh)) { - flags |= FIEMAP_EXTENT_DELALLOC; - page_cache_release(page); + /* No mapped buffer found. */ + goto out; } else { - page_cache_release(page); - return EXT_CONTINUE; + /*Find contiguous delayed buffers. */ + if (ret > 0 && pages[0]->index == last_offset) + head = page_buffers(pages[0]); + bh = head; } + +found_mapped_buffer: + if (bh != NULL && buffer_delay(bh)) { + /* 1st or contiguous delayed buffer found. */ + if (!(flags & FIEMAP_EXTENT_DELALLOC)) { + /* + * 1st delayed buffer found, record + * the start of extent. + */ + flags |= FIEMAP_EXTENT_DELALLOC; + newex->ec_block = end; + logical = (__u64)end << blksize_bits; + } + /* Find contiguous delayed buffers. */ + do { + if (!buffer_delay(bh)) + goto found_delayed_extent; + bh = bh->b_this_page; + end++; + } while (bh != head); + + for (index = 1; index < ret; index++) { + if (!page_has_buffers(pages[index])) { + bh = NULL; + break; + } + head = page_buffers(pages[index]); + if (!head) { + bh = NULL; + break; + } + if (pages[index]->index != + pages[0]->index + index) { + /* Blocks are not contiguous. */ + bh = NULL; + break; + } + bh = head; + do { + if (!buffer_delay(bh)) + /* Delayed-extent ends. */ + goto found_delayed_extent; + bh = bh->b_this_page; + end++; + } while (bh != head); + } + } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) + /* a hole found. */ + goto out; + +found_delayed_extent: + newex->ec_len = min(end - newex->ec_block, + (ext4_lblk_t)EXT_INIT_MAX_LEN); + if (ret == nr_pages && bh != NULL && + newex->ec_len < EXT_INIT_MAX_LEN && + buffer_delay(bh)) { + /* Have not collected an extent and continue. */ + for (index = 0; index < ret; index++) + page_cache_release(pages[index]); + goto repeat; + } + + for (index = 0; index < ret; index++) + page_cache_release(pages[index]); + kfree(pages); } physical = (__u64)newex->ec_start << blksize_bits; @@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, if (ex && ext4_ext_is_uninitialized(ex)) flags |= FIEMAP_EXTENT_UNWRITTEN; - /* - * If this extent reaches EXT_MAX_BLOCK, it must be last. - * - * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, - * this also indicates no more allocated blocks. - * - * XXX this might miss a single-block extent at EXT_MAX_BLOCK - */ - if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || - newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { - loff_t size = i_size_read(inode); - loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); - + size = i_size_read(inode); + if (logical + length >= size) flags |= FIEMAP_EXTENT_LAST; - if ((flags & FIEMAP_EXTENT_DELALLOC) && - logical+length > size) - length = (size - logical + bs - 1) & ~(bs-1); - } - error = fiemap_fill_next_extent(fieinfo, logical, physical, + ret = fiemap_fill_next_extent(fieinfo, logical, physical, length, flags); - if (error < 0) - return error; - if (error == 1) + if (ret < 0) + return ret; + if (ret == 1) return EXT_BREAK; - return EXT_CONTINUE; } |