summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/block-group.c29
-rw-r--r--fs/btrfs/free-space-cache.c155
-rw-r--r--fs/btrfs/free-space-cache.h3
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
4 files changed, 70 insertions, 119 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index bb6685711824..adbd18dc08a1 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -695,33 +695,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock);
- /*
- * This should be a rare occasion, but this could happen I think in the
- * case where one thread starts to load the space cache info, and then
- * some other thread starts a transaction commit which tries to do an
- * allocation while the other thread is still loading the space cache
- * info. The previous loop should have kept us from choosing this block
- * group, but if we've moved to the state where we will wait on caching
- * block groups we need to first check if we're doing a fast load here,
- * so we can wait for it to finish, otherwise we could end up allocating
- * from a block group who's cache gets evicted for one reason or
- * another.
- */
- while (cache->cached == BTRFS_CACHE_FAST) {
- struct btrfs_caching_control *ctl;
-
- ctl = cache->caching_ctl;
- refcount_inc(&ctl->count);
- prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&cache->lock);
-
- schedule();
-
- finish_wait(&ctl->wait, &wait);
- btrfs_put_caching_control(ctl);
- spin_lock(&cache->lock);
- }
-
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
@@ -1805,7 +1778,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->discard_list);
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
- btrfs_init_free_space_ctl(cache);
+ btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0787339c7b93..58bd2d3e54db 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -33,8 +33,6 @@ struct btrfs_trim_range {
struct list_head list;
};
-static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *bitmap_info);
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -43,6 +41,14 @@ static int btrfs_wait_cache_io_root(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_io_ctl *io_ctl,
struct btrfs_path *path);
+static int search_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *bitmap_info, u64 *offset,
+ u64 *bytes, bool for_alloc);
+static void free_bitmap(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *bitmap_info);
+static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
+ struct btrfs_free_space *info, u64 offset,
+ u64 bytes);
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
@@ -625,44 +631,6 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
return 0;
}
-/*
- * Since we attach pinned extents after the fact we can have contiguous sections
- * of free space that are split up in entries. This poses a problem with the
- * tree logging stuff since it could have allocated across what appears to be 2
- * entries since we would have merged the entries when adding the pinned extents
- * back to the free space cache. So run through the space cache that we just
- * loaded and merge contiguous entries. This will make the log replay stuff not
- * blow up and it will make for nicer allocator behavior.
- */
-static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
-{
- struct btrfs_free_space *e, *prev = NULL;
- struct rb_node *n;
-
-again:
- spin_lock(&ctl->tree_lock);
- for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
- e = rb_entry(n, struct btrfs_free_space, offset_index);
- if (!prev)
- goto next;
- if (e->bitmap || prev->bitmap)
- goto next;
- if (prev->offset + prev->bytes == e->offset) {
- unlink_free_space(ctl, prev);
- unlink_free_space(ctl, e);
- prev->bytes += e->bytes;
- kmem_cache_free(btrfs_free_space_cachep, e);
- link_free_space(ctl, prev);
- prev = NULL;
- spin_unlock(&ctl->tree_lock);
- goto again;
- }
-next:
- prev = e;
- }
- spin_unlock(&ctl->tree_lock);
-}
-
static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
struct btrfs_path *path, u64 offset)
@@ -753,16 +721,6 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
- /*
- * Sync discard ensures that the free space cache is always
- * trimmed. So when reading this in, the state should reflect
- * that. We also do this for async as a stop gap for lack of
- * persistence.
- */
- if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
- btrfs_test_opt(fs_info, DISCARD_ASYNC))
- e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
-
if (!e->bytes) {
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
@@ -816,16 +774,9 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = io_ctl_read_bitmap(&io_ctl, e);
if (ret)
goto free_cache;
- e->bitmap_extents = count_bitmap_extents(ctl, e);
- if (!btrfs_free_space_trimmed(e)) {
- ctl->discardable_extents[BTRFS_STAT_CURR] +=
- e->bitmap_extents;
- ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
- }
}
io_ctl_drop_pages(&io_ctl);
- merge_space_tree(ctl);
ret = 1;
out:
io_ctl_free(&io_ctl);
@@ -836,10 +787,46 @@ free_cache:
goto out;
}
+static int copy_free_space_cache(struct btrfs_block_group *block_group,
+ struct btrfs_free_space_ctl *ctl)
+{
+ struct btrfs_free_space *info;
+ struct rb_node *n;
+ int ret = 0;
+
+ while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
+ info = rb_entry(n, struct btrfs_free_space, offset_index);
+ if (!info->bitmap) {
+ unlink_free_space(ctl, info);
+ ret = btrfs_add_free_space(block_group, info->offset,
+ info->bytes);
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ } else {
+ u64 offset = info->offset;
+ u64 bytes = ctl->unit;
+
+ while (search_bitmap(ctl, info, &offset, &bytes,
+ false) == 0) {
+ ret = btrfs_add_free_space(block_group, offset,
+ bytes);
+ if (ret)
+ break;
+ bitmap_clear_bits(ctl, info, offset, bytes);
+ offset = info->offset;
+ bytes = ctl->unit;
+ }
+ free_bitmap(ctl, info);
+ }
+ cond_resched();
+ }
+ return ret;
+}
+
int load_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+ struct btrfs_free_space_ctl tmp_ctl = {};
struct inode *inode;
struct btrfs_path *path;
int ret = 0;
@@ -847,6 +834,13 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
u64 used = block_group->used;
/*
+ * Because we could potentially discard our loaded free space, we want
+ * to load everything into a temporary structure first, and then if it's
+ * valid copy it all into the actual free space ctl.
+ */
+ btrfs_init_free_space_ctl(block_group, &tmp_ctl);
+
+ /*
* If this block group has been marked to be cleared for one reason or
* another then we can't trust the on disk cache, so just return.
*/
@@ -897,19 +891,25 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
}
spin_unlock(&block_group->lock);
- ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
+ ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
path, block_group->start);
btrfs_free_path(path);
if (ret <= 0)
goto out;
- spin_lock(&ctl->tree_lock);
- matched = (ctl->free_space == (block_group->length - used -
- block_group->bytes_super));
- spin_unlock(&ctl->tree_lock);
+ matched = (tmp_ctl.free_space == (block_group->length - used -
+ block_group->bytes_super));
- if (!matched) {
- __btrfs_remove_free_space_cache(ctl);
+ if (matched) {
+ ret = copy_free_space_cache(block_group, &tmp_ctl);
+ /*
+ * ret == 1 means we successfully loaded the free space cache,
+ * so we need to re-set it here.
+ */
+ if (ret == 0)
+ ret = 1;
+ } else {
+ __btrfs_remove_free_space_cache(&tmp_ctl);
btrfs_warn(fs_info,
"block group %llu has wrong amount of free space",
block_group->start);
@@ -1914,29 +1914,6 @@ out:
return NULL;
}
-static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
- struct btrfs_free_space *bitmap_info)
-{
- struct btrfs_block_group *block_group = ctl->private;
- u64 bytes = bitmap_info->bytes;
- unsigned int rs, re;
- int count = 0;
-
- if (!block_group || !bytes)
- return count;
-
- bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
- BITS_PER_BITMAP) {
- bytes -= (rs - re) * ctl->unit;
- count++;
-
- if (!bytes)
- break;
- }
-
- return count;
-}
-
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset)
{
@@ -2676,10 +2653,10 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
"%d blocks of free space at or bigger than bytes is", count);
}
-void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
+ struct btrfs_free_space_ctl *ctl)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
spin_lock_init(&ctl->tree_lock);
ctl->unit = fs_info->sectorsize;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index e3d5e0ad8f8e..bf8d127d2407 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -109,7 +109,8 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_path *path,
struct inode *inode);
-void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
+ struct btrfs_free_space_ctl *ctl);
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size,
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b36e88753f1f..8ca334d554af 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -225,7 +225,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->bg_list);
- btrfs_init_free_space_ctl(cache);
+ btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
mutex_init(&cache->free_space_lock);
return cache;