summaryrefslogtreecommitdiff
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
authorLi Zefan <lizf@cn.fujitsu.com>2011-04-20 10:06:11 +0800
committerLi Zefan <lizf@cn.fujitsu.com>2011-04-25 16:46:04 +0800
commit581bb050941b4f220f84d3e5ed6dace3d42dd382 (patch)
tree5ebd56af5eb3612f508419b188dfc18e959e7c94 /fs/btrfs/free-space-cache.c
parent34d52cb6c50b5a43901709998f59fb1c5a43dc4a (diff)
downloadlwn-581bb050941b4f220f84d3e5ed6dace3d42dd382.tar.gz
lwn-581bb050941b4f220f84d3e5ed6dace3d42dd382.zip
Btrfs: Cache free inode numbers in memory
Currently btrfs stores the highest objectid of the fs tree, and it always returns (highest+1) inode number when we create a file, so inode numbers won't be reclaimed when we delete files, so we'll run out of inode numbers as we keep create/delete files in 32bits machines. This fixes it, and it works similarly to how we cache free space in block cgroups. We start a kernel thread to read the file tree. By scanning inode items, we know which chunks of inode numbers are free, and we cache them in an rb-tree. Because we are searching the commit root, we have to carefully handle the cross-transaction case. The rb-tree is a hybrid extent+bitmap tree, so if we have too many small chunks of inode numbers, we'll use bitmaps. Initially we allow 16K ram of extents, and a bitmap will be used if we exceed this threshold. The extents threshold is adjusted in runtime. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c96
1 files changed, 76 insertions, 20 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d4fb4f077a79..2ce89bfc8815 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -25,6 +25,7 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
+#include "inode-map.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -105,7 +106,7 @@ int create_free_space_inode(struct btrfs_root *root,
u64 objectid;
int ret;
- ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret < 0)
return ret;
@@ -1496,10 +1497,9 @@ bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
return merged;
}
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes)
+int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
+ u64 offset, u64 bytes)
{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
int ret = 0;
@@ -1751,11 +1751,29 @@ out:
return 0;
}
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *info;
struct rb_node *node;
+
+ spin_lock(&ctl->tree_lock);
+ while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
+ info = rb_entry(node, struct btrfs_free_space, offset_index);
+ unlink_free_space(ctl, info);
+ kfree(info->bitmap);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ if (need_resched()) {
+ spin_unlock(&ctl->tree_lock);
+ cond_resched();
+ spin_lock(&ctl->tree_lock);
+ }
+ }
+ spin_unlock(&ctl->tree_lock);
+}
+
+void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+{
+ struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_cluster *cluster;
struct list_head *head;
@@ -1773,21 +1791,9 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
spin_lock(&ctl->tree_lock);
}
}
-
- while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
- info = rb_entry(node, struct btrfs_free_space, offset_index);
- unlink_free_space(ctl, info);
- if (info->bitmap)
- kfree(info->bitmap);
- kmem_cache_free(btrfs_free_space_cachep, info);
- if (need_resched()) {
- spin_unlock(&ctl->tree_lock);
- cond_resched();
- spin_lock(&ctl->tree_lock);
- }
- }
-
spin_unlock(&ctl->tree_lock);
+
+ __btrfs_remove_free_space_cache(ctl);
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
@@ -2352,3 +2358,53 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
return ret;
}
+
+/*
+ * Find the left-most item in the cache tree, and then return the
+ * smallest inode number in the item.
+ *
+ * Note: the returned inode number may not be the smallest one in
+ * the tree, if the left-most item is a bitmap.
+ */
+u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
+{
+ struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
+ struct btrfs_free_space *entry = NULL;
+ u64 ino = 0;
+
+ spin_lock(&ctl->tree_lock);
+
+ if (RB_EMPTY_ROOT(&ctl->free_space_offset))
+ goto out;
+
+ entry = rb_entry(rb_first(&ctl->free_space_offset),
+ struct btrfs_free_space, offset_index);
+
+ if (!entry->bitmap) {
+ ino = entry->offset;
+
+ unlink_free_space(ctl, entry);
+ entry->offset++;
+ entry->bytes--;
+ if (!entry->bytes)
+ kmem_cache_free(btrfs_free_space_cachep, entry);
+ else
+ link_free_space(ctl, entry);
+ } else {
+ u64 offset = 0;
+ u64 count = 1;
+ int ret;
+
+ ret = search_bitmap(ctl, entry, &offset, &count);
+ BUG_ON(ret);
+
+ ino = offset;
+ bitmap_clear_bits(ctl, entry, offset, 1);
+ if (entry->bytes == 0)
+ free_bitmap(ctl, entry);
+ }
+out:
+ spin_unlock(&ctl->tree_lock);
+
+ return ino;
+}