summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c700
1 files changed, 398 insertions, 302 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 33d979e9ea2a..e2d7e86b51d1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -97,10 +97,11 @@ static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
-static int __reserve_metadata_bytes(struct btrfs_root *root,
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush);
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk);
static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 num_bytes);
@@ -766,6 +767,26 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
return NULL;
}
+static void add_pinned_bytes(struct btrfs_fs_info *fs_info, s64 num_bytes,
+ u64 owner, u64 root_objectid)
+{
+ struct btrfs_space_info *space_info;
+ u64 flags;
+
+ if (owner < BTRFS_FIRST_FREE_OBJECTID) {
+ if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ flags = BTRFS_BLOCK_GROUP_SYSTEM;
+ else
+ flags = BTRFS_BLOCK_GROUP_METADATA;
+ } else {
+ flags = BTRFS_BLOCK_GROUP_DATA;
+ }
+
+ space_info = __find_space_info(fs_info, flags);
+ ASSERT(space_info);
+ percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
+}
+
/*
* after adding space to the filesystem, we need to clear the full flags
* on all the space infos.
@@ -1127,6 +1148,64 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
}
#endif
+/*
+ * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
+ * is_data == BTRFS_REF_TYPE_DATA, data type is requried,
+ * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
+ */
+int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
+ struct btrfs_extent_inline_ref *iref,
+ enum btrfs_inline_ref_type is_data)
+{
+ int type = btrfs_extent_inline_ref_type(eb, iref);
+ u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
+
+ if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_DATA_REF_KEY ||
+ type == BTRFS_EXTENT_DATA_REF_KEY) {
+ if (is_data == BTRFS_REF_TYPE_BLOCK) {
+ if (type == BTRFS_TREE_BLOCK_REF_KEY)
+ return type;
+ if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
+ ASSERT(eb->fs_info);
+ /*
+ * Every shared one has parent tree
+ * block, which must be aligned to
+ * nodesize.
+ */
+ if (offset &&
+ IS_ALIGNED(offset, eb->fs_info->nodesize))
+ return type;
+ }
+ } else if (is_data == BTRFS_REF_TYPE_DATA) {
+ if (type == BTRFS_EXTENT_DATA_REF_KEY)
+ return type;
+ if (type == BTRFS_SHARED_DATA_REF_KEY) {
+ ASSERT(eb->fs_info);
+ /*
+ * Every shared one has parent tree
+ * block, which must be aligned to
+ * nodesize.
+ */
+ if (offset &&
+ IS_ALIGNED(offset, eb->fs_info->nodesize))
+ return type;
+ }
+ } else {
+ ASSERT(is_data == BTRFS_REF_TYPE_ANY);
+ return type;
+ }
+ }
+
+ btrfs_print_leaf((struct extent_buffer *)eb);
+ btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
+ eb->start, type);
+ WARN_ON(1);
+
+ return BTRFS_REF_TYPE_INVALID;
+}
+
static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
{
u32 high_crc = ~(u32)0;
@@ -1396,12 +1475,18 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
struct btrfs_extent_data_ref *ref1;
struct btrfs_shared_data_ref *ref2;
u32 num_refs = 0;
+ int type;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (iref) {
- if (btrfs_extent_inline_ref_type(leaf, iref) ==
- BTRFS_EXTENT_DATA_REF_KEY) {
+ /*
+ * If type is invalid, we should have bailed out earlier than
+ * this call.
+ */
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
+ ASSERT(type != BTRFS_REF_TYPE_INVALID);
+ if (type == BTRFS_EXTENT_DATA_REF_KEY) {
ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
num_refs = btrfs_extent_data_ref_count(leaf, ref1);
} else {
@@ -1562,6 +1647,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
int ret;
int err = 0;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
+ int needed;
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1653,6 +1739,11 @@ again:
BUG_ON(ptr > end);
}
+ if (owner >= BTRFS_FIRST_FREE_OBJECTID)
+ needed = BTRFS_REF_TYPE_DATA;
+ else
+ needed = BTRFS_REF_TYPE_BLOCK;
+
err = -ENOENT;
while (1) {
if (ptr >= end) {
@@ -1660,7 +1751,12 @@ again:
break;
}
iref = (struct btrfs_extent_inline_ref *)ptr;
- type = btrfs_extent_inline_ref_type(leaf, iref);
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
+ if (type == BTRFS_REF_TYPE_INVALID) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (want < type)
break;
if (want > type) {
@@ -1852,7 +1948,12 @@ void update_inline_extent_backref(struct btrfs_fs_info *fs_info,
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
- type = btrfs_extent_inline_ref_type(leaf, iref);
+ /*
+ * If type is invalid, we should have bailed out after
+ * lookup_inline_extent_backref().
+ */
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
+ ASSERT(type != BTRFS_REF_TYPE_INVALID);
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -2092,6 +2193,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset)
{
+ int old_ref_mod, new_ref_mod;
int ret;
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
@@ -2099,15 +2201,21 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_ADD_DELAYED_REF, NULL);
+ num_bytes, parent,
+ root_objectid, (int)owner,
+ BTRFS_ADD_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes, parent, root_objectid,
- owner, offset, 0,
- BTRFS_ADD_DELAYED_REF);
+ num_bytes, parent,
+ root_objectid, owner, offset,
+ 0, BTRFS_ADD_DELAYED_REF,
+ &old_ref_mod, &new_ref_mod);
}
+
+ if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
+ add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
+
return ret;
}
@@ -2411,6 +2519,16 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
head = btrfs_delayed_node_to_head(node);
trace_run_delayed_ref_head(fs_info, node, head, node->action);
+ if (head->total_ref_mod < 0) {
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(fs_info, node->bytenr);
+ ASSERT(cache);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned,
+ -node->num_bytes);
+ btrfs_put_block_group(cache);
+ }
+
if (insert_reserved) {
btrfs_pin_extent(fs_info, node->bytenr,
node->num_bytes, 1);
@@ -3120,6 +3238,7 @@ static noinline int check_committed_ref(struct btrfs_root *root,
struct btrfs_extent_item *ei;
struct btrfs_key key;
u32 item_size;
+ int type;
int ret;
key.objectid = bytenr;
@@ -3161,8 +3280,9 @@ static noinline int check_committed_ref(struct btrfs_root *root,
goto out;
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
- if (btrfs_extent_inline_ref_type(leaf, iref) !=
- BTRFS_EXTENT_DATA_REF_KEY)
+
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
+ if (type != BTRFS_EXTENT_DATA_REF_KEY)
goto out;
ref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -3364,6 +3484,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *root = fs_info->tree_root;
struct inode *inode = NULL;
+ struct extent_changeset *data_reserved = NULL;
u64 alloc_hint = 0;
int dcs = BTRFS_DC_ERROR;
u64 num_pages = 0;
@@ -3483,7 +3604,7 @@ again:
num_pages *= 16;
num_pages *= PAGE_SIZE;
- ret = btrfs_check_data_free_space(inode, 0, num_pages);
+ ret = btrfs_check_data_free_space(inode, &data_reserved, 0, num_pages);
if (ret)
goto out_put;
@@ -3514,6 +3635,7 @@ out:
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
+ extent_changeset_free(data_reserved);
return ret;
}
@@ -3924,88 +4046,83 @@ static const char *alloc_name(u64 flags)
};
}
-static int update_space_info(struct btrfs_fs_info *info, u64 flags,
- u64 total_bytes, u64 bytes_used,
- u64 bytes_readonly,
- struct btrfs_space_info **space_info)
+static int create_space_info(struct btrfs_fs_info *info, u64 flags,
+ struct btrfs_space_info **new)
{
- struct btrfs_space_info *found;
+
+ struct btrfs_space_info *space_info;
int i;
- int factor;
int ret;
- if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10))
- factor = 2;
- else
- factor = 1;
-
- found = __find_space_info(info, flags);
- if (found) {
- spin_lock(&found->lock);
- found->total_bytes += total_bytes;
- found->disk_total += total_bytes * factor;
- found->bytes_used += bytes_used;
- found->disk_used += bytes_used * factor;
- found->bytes_readonly += bytes_readonly;
- if (total_bytes > 0)
- found->full = 0;
- space_info_add_new_bytes(info, found, total_bytes -
- bytes_used - bytes_readonly);
- spin_unlock(&found->lock);
- *space_info = found;
- return 0;
- }
- found = kzalloc(sizeof(*found), GFP_NOFS);
- if (!found)
+ space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
+ if (!space_info)
return -ENOMEM;
- ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
+ ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
+ GFP_KERNEL);
if (ret) {
- kfree(found);
+ kfree(space_info);
return ret;
}
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
- INIT_LIST_HEAD(&found->block_groups[i]);
- init_rwsem(&found->groups_sem);
- spin_lock_init(&found->lock);
- found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
- found->total_bytes = total_bytes;
- found->disk_total = total_bytes * factor;
- found->bytes_used = bytes_used;
- found->disk_used = bytes_used * factor;
- found->bytes_pinned = 0;
- found->bytes_reserved = 0;
- found->bytes_readonly = bytes_readonly;
- found->bytes_may_use = 0;
- found->full = 0;
- found->max_extent_size = 0;
- found->force_alloc = CHUNK_ALLOC_NO_FORCE;
- found->chunk_alloc = 0;
- found->flush = 0;
- init_waitqueue_head(&found->wait);
- INIT_LIST_HEAD(&found->ro_bgs);
- INIT_LIST_HEAD(&found->tickets);
- INIT_LIST_HEAD(&found->priority_tickets);
-
- ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
+ INIT_LIST_HEAD(&space_info->block_groups[i]);
+ init_rwsem(&space_info->groups_sem);
+ spin_lock_init(&space_info->lock);
+ space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
+ space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+ init_waitqueue_head(&space_info->wait);
+ INIT_LIST_HEAD(&space_info->ro_bgs);
+ INIT_LIST_HEAD(&space_info->tickets);
+ INIT_LIST_HEAD(&space_info->priority_tickets);
+
+ ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
info->space_info_kobj, "%s",
- alloc_name(found->flags));
+ alloc_name(space_info->flags));
if (ret) {
- percpu_counter_destroy(&found->total_bytes_pinned);
- kfree(found);
+ percpu_counter_destroy(&space_info->total_bytes_pinned);
+ kfree(space_info);
return ret;
}
- *space_info = found;
- list_add_rcu(&found->list, &info->space_info);
+ *new = space_info;
+ list_add_rcu(&space_info->list, &info->space_info);
if (flags & BTRFS_BLOCK_GROUP_DATA)
- info->data_sinfo = found;
+ info->data_sinfo = space_info;
return ret;
}
+static void update_space_info(struct btrfs_fs_info *info, u64 flags,
+ u64 total_bytes, u64 bytes_used,
+ u64 bytes_readonly,
+ struct btrfs_space_info **space_info)
+{
+ struct btrfs_space_info *found;
+ int factor;
+
+ if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ factor = 2;
+ else
+ factor = 1;
+
+ found = __find_space_info(info, flags);
+ ASSERT(found);
+ spin_lock(&found->lock);
+ found->total_bytes += total_bytes;
+ found->disk_total += total_bytes * factor;
+ found->bytes_used += bytes_used;
+ found->disk_used += bytes_used * factor;
+ found->bytes_readonly += bytes_readonly;
+ if (total_bytes > 0)
+ found->full = 0;
+ space_info_add_new_bytes(info, found, total_bytes -
+ bytes_used - bytes_readonly);
+ spin_unlock(&found->lock);
+ *space_info = found;
+}
+
static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
u64 extra_flags = chunk_to_extended(flags) &
@@ -4121,7 +4238,7 @@ static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
return btrfs_reduce_alloc_profile(fs_info, flags);
}
-u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
+static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 flags;
@@ -4138,6 +4255,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return ret;
}
+u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
+}
+
+u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+}
+
+u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
+{
+ return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+}
+
static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
bool may_use_included)
{
@@ -4149,9 +4281,9 @@ static u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
{
- struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
u64 used;
int ret = 0;
int need_commit = 2;
@@ -4165,10 +4297,6 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
ASSERT(current->journal_info);
}
- data_sinfo = fs_info->data_sinfo;
- if (!data_sinfo)
- goto alloc;
-
again:
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
@@ -4186,8 +4314,8 @@ again:
data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
spin_unlock(&data_sinfo->lock);
-alloc:
- alloc_target = btrfs_get_alloc_profile(root, 1);
+
+ alloc_target = btrfs_data_alloc_profile(fs_info);
/*
* It is ugly that we don't call nolock join
* transaction for the free space inode case here.
@@ -4214,9 +4342,6 @@ alloc:
}
}
- if (!data_sinfo)
- data_sinfo = fs_info->data_sinfo;
-
goto again;
}
@@ -4238,7 +4363,7 @@ commit_trans:
if (need_commit > 0) {
btrfs_start_delalloc_roots(fs_info, 0, -1);
- btrfs_wait_ordered_roots(fs_info, -1, 0,
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
(u64)-1);
}
@@ -4278,12 +4403,8 @@ commit_trans:
return ret;
}
-/*
- * New check_data_free_space() with ability for precious data reservation
- * Will replace old btrfs_check_data_free_space(), but for patch split,
- * add a new function first and then replace it.
- */
-int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
+int btrfs_check_data_free_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int ret;
@@ -4298,9 +4419,11 @@ int btrfs_check_data_free_space(struct inode *inode, u64 start, u64 len)
return ret;
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
- ret = btrfs_qgroup_reserve_data(inode, start, len);
- if (ret)
+ ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
+ if (ret < 0)
btrfs_free_reserved_data_space_noquota(inode, start, len);
+ else
+ ret = 0;
return ret;
}
@@ -4341,7 +4464,8 @@ void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
* This one will handle the per-inode data rsv map for accurate reserved
* space framework.
*/
-void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
+void btrfs_free_reserved_data_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -4351,7 +4475,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 start, u64 len)
start = round_down(start, root->fs_info->sectorsize);
btrfs_free_reserved_data_space_noquota(inode, start, len);
- btrfs_qgroup_free_data(inode, start, len);
+ btrfs_qgroup_free_data(inode, reserved, start, len);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
@@ -4376,8 +4500,7 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
- u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
- u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
+ u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
@@ -4389,7 +4512,7 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
* global_rsv, it doesn't change except when the transaction commits.
*/
if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
- num_allocated += calc_global_rsv_need_space(global_rsv);
+ bytes_used += calc_global_rsv_need_space(global_rsv);
/*
* in limited mode, we want to have some free space up to
@@ -4399,11 +4522,11 @@ static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
thresh = btrfs_super_total_bytes(fs_info->super_copy);
thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
- if (num_bytes - num_allocated < thresh)
+ if (sinfo->total_bytes - bytes_used < thresh)
return 1;
}
- if (num_allocated + SZ_2M < div_factor(num_bytes, 8))
+ if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
return 0;
return 1;
}
@@ -4463,9 +4586,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans,
}
if (left < thresh) {
- u64 flags;
+ u64 flags = btrfs_system_alloc_profile(fs_info);
- flags = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
/*
* Ignore failure to create system chunk. We might end up not
* needing it, as we might not need to COW all nodes/leafs from
@@ -4506,10 +4628,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
space_info = __find_space_info(fs_info, flags);
if (!space_info) {
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
- BUG_ON(ret); /* -ENOMEM */
+ ret = create_space_info(fs_info, flags, &space_info);
+ if (ret)
+ return ret;
}
- BUG_ON(!space_info); /* Logic error */
again:
spin_lock(&space_info->lock);
@@ -4614,11 +4736,11 @@ out:
return ret;
}
-static int can_overcommit(struct btrfs_root *root,
+static int can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush)
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
u64 profile;
u64 space_size;
@@ -4629,7 +4751,11 @@ static int can_overcommit(struct btrfs_root *root,
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
return 0;
- profile = btrfs_get_alloc_profile(root, 0);
+ if (system_chunk)
+ profile = btrfs_system_alloc_profile(fs_info);
+ else
+ profile = btrfs_metadata_alloc_profile(fs_info);
+
used = btrfs_space_info_used(space_info, false);
/*
@@ -4646,9 +4772,7 @@ static int can_overcommit(struct btrfs_root *root,
used += space_info->bytes_may_use;
- spin_lock(&fs_info->free_chunk_lock);
- avail = fs_info->free_chunk_space;
- spin_unlock(&fs_info->free_chunk_lock);
+ avail = atomic64_read(&fs_info->free_chunk_space);
/*
* If we have dup, raid1 or raid10 then only half of the free
@@ -4698,14 +4822,14 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_fs_info *fs_info,
}
}
-static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
+static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
u64 to_reclaim)
{
u64 bytes;
- int nr;
+ u64 nr;
bytes = btrfs_calc_trans_metadata_size(fs_info, 1);
- nr = (int)div64_u64(to_reclaim, bytes);
+ nr = div64_u64(to_reclaim, bytes);
if (!nr)
nr = 1;
return nr;
@@ -4716,24 +4840,23 @@ static inline int calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
/*
* shrink metadata reservation for delalloc
*/
-static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
- bool wait_ordered)
+static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim,
+ u64 orig, bool wait_ordered)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
u64 delalloc_bytes;
u64 max_reclaim;
+ u64 items;
long time_left;
unsigned long nr_pages;
int loops;
- int items;
enum btrfs_reserve_flush_enum flush;
/* Calc the number of the pages we need flush for space reservation */
items = calc_reclaim_items_nr(fs_info, to_reclaim);
- to_reclaim = (u64)items * EXTENT_SIZE_PER_ITEM;
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &fs_info->delalloc_block_rsv;
@@ -4776,10 +4899,6 @@ skip_async:
else
flush = BTRFS_RESERVE_NO_FLUSH;
spin_lock(&space_info->lock);
- if (can_overcommit(root, space_info, orig, flush)) {
- spin_unlock(&space_info->lock);
- break;
- }
if (list_empty(&space_info->tickets) &&
list_empty(&space_info->priority_tickets)) {
spin_unlock(&space_info->lock);
@@ -4838,7 +4957,7 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
spin_lock(&delayed_rsv->lock);
if (percpu_counter_compare(&space_info->total_bytes_pinned,
- bytes - delayed_rsv->size) >= 0) {
+ bytes - delayed_rsv->size) < 0) {
spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
@@ -4859,9 +4978,14 @@ struct reserve_ticket {
wait_queue_head_t wait;
};
-static int flush_space(struct btrfs_fs_info *fs_info,
+/*
+ * Try to flush some data based on policy set by @state. This is only advisory
+ * and may fail for various reasons. The caller is supposed to examine the
+ * state of @space_info to detect the outcome.
+ */
+static void flush_space(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 num_bytes,
- u64 orig_bytes, int state)
+ int state)
{
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_trans_handle *trans;
@@ -4886,7 +5010,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELALLOC:
case FLUSH_DELALLOC_WAIT:
- shrink_delalloc(root, num_bytes * 2, orig_bytes,
+ shrink_delalloc(fs_info, num_bytes * 2, num_bytes,
state == FLUSH_DELALLOC_WAIT);
break;
case ALLOC_CHUNK:
@@ -4896,7 +5020,7 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break;
}
ret = do_chunk_alloc(trans, fs_info,
- btrfs_get_alloc_profile(root, 0),
+ btrfs_metadata_alloc_profile(fs_info),
CHUNK_ALLOC_NO_FORCE);
btrfs_end_transaction(trans);
if (ret > 0 || ret == -ENOSPC)
@@ -4904,21 +5028,22 @@ static int flush_space(struct btrfs_fs_info *fs_info,
break;
case COMMIT_TRANS:
ret = may_commit_transaction(fs_info, space_info,
- orig_bytes, 0);
+ num_bytes, 0);
break;
default:
ret = -ENOSPC;
break;
}
- trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes,
- orig_bytes, state, ret);
- return ret;
+ trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state,
+ ret);
+ return;
}
static inline u64
-btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
- struct btrfs_space_info *space_info)
+btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ bool system_chunk)
{
struct reserve_ticket *ticket;
u64 used;
@@ -4933,14 +5058,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim;
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
- if (can_overcommit(root, space_info, to_reclaim,
- BTRFS_RESERVE_FLUSH_ALL))
+ if (can_overcommit(fs_info, space_info, to_reclaim,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
return 0;
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
- if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
+ used = btrfs_space_info_used(space_info, true);
+
+ if (can_overcommit(fs_info, space_info, SZ_1M,
+ BTRFS_RESERVE_FLUSH_ALL, system_chunk))
expected = div_factor_fine(space_info->total_bytes, 95);
else
expected = div_factor_fine(space_info->total_bytes, 90);
@@ -4954,17 +5079,18 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
return to_reclaim;
}
-static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
- struct btrfs_root *root, u64 used)
+static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ u64 used, bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
/* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
- if (!btrfs_calc_reclaim_metadata_size(root, space_info))
+ if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ system_chunk))
return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@@ -5001,8 +5127,8 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
if (!to_reclaim) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
@@ -5013,21 +5139,16 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
flush_state = FLUSH_DELAYED_ITEMS_NR;
do {
- struct reserve_ticket *ticket;
- int ret;
-
- ret = flush_space(fs_info, space_info, to_reclaim, to_reclaim,
- flush_state);
+ flush_space(fs_info, space_info, to_reclaim, flush_state);
spin_lock(&space_info->lock);
if (list_empty(&space_info->tickets)) {
space_info->flush = 0;
spin_unlock(&space_info->lock);
return;
}
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
- space_info);
- ticket = list_first_entry(&space_info->tickets,
- struct reserve_ticket, list);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
+ space_info,
+ false);
if (last_tickets_id == space_info->tickets_id) {
flush_state++;
} else {
@@ -5063,8 +5184,8 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
int flush_state = FLUSH_DELAYED_ITEMS_NR;
spin_lock(&space_info->lock);
- to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->extent_root,
- space_info);
+ to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
+ false);
if (!to_reclaim) {
spin_unlock(&space_info->lock);
return;
@@ -5072,8 +5193,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
spin_unlock(&space_info->lock);
do {
- flush_space(fs_info, space_info, to_reclaim, to_reclaim,
- flush_state);
+ flush_space(fs_info, space_info, to_reclaim, flush_state);
flush_state++;
spin_lock(&space_info->lock);
if (ticket->bytes == 0) {
@@ -5143,12 +5263,12 @@ static int wait_reserve_ticket(struct btrfs_fs_info *fs_info,
* regain reservations will be made and this will fail if there is not enough
* space already.
*/
-static int __reserve_metadata_bytes(struct btrfs_root *root,
+static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info,
u64 orig_bytes,
- enum btrfs_reserve_flush_enum flush)
+ enum btrfs_reserve_flush_enum flush,
+ bool system_chunk)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct reserve_ticket ticket;
u64 used;
int ret = 0;
@@ -5170,7 +5290,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
ret = 0;
- } else if (can_overcommit(root, space_info, orig_bytes, flush)) {
+ } else if (can_overcommit(fs_info, space_info, orig_bytes, flush,
+ system_chunk)) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, orig_bytes, 1);
@@ -5197,7 +5318,7 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
orig_bytes, flush,
"enospc");
queue_work(system_unbound_wq,
- &root->fs_info->async_reclaim_work);
+ &fs_info->async_reclaim_work);
}
} else {
list_add_tail(&ticket.list,
@@ -5211,7 +5332,8 @@ static int __reserve_metadata_bytes(struct btrfs_root *root,
* the async reclaim as we will panic.
*/
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
- need_do_async_reclaim(space_info, root, used) &&
+ need_do_async_reclaim(fs_info, space_info,
+ used, system_chunk) &&
!work_busy(&fs_info->async_reclaim_work)) {
trace_btrfs_trigger_flush(fs_info, space_info->flags,
orig_bytes, flush, "preempt");
@@ -5269,9 +5391,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
int ret;
+ bool system_chunk = (root == fs_info->chunk_root);
- ret = __reserve_metadata_bytes(root, block_rsv->space_info, orig_bytes,
- flush);
+ ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
+ orig_bytes, flush, system_chunk);
if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
if (block_rsv != global_rsv &&
@@ -5380,9 +5503,7 @@ static void space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
* overcommit, and if we can't then we just need to free up our space
* and not satisfy any requests.
*/
- used = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = btrfs_space_info_used(space_info, true);
if (used - num_bytes >= space_info->total_bytes)
check_overcommit = true;
again:
@@ -5394,8 +5515,7 @@ again:
* adding the ticket space would be a double count.
*/
if (check_overcommit &&
- !can_overcommit(fs_info->extent_root, space_info, 0,
- flush))
+ !can_overcommit(fs_info, space_info, 0, flush, false))
break;
if (num_bytes >= ticket->bytes) {
list_del_init(&ticket->list);
@@ -6124,6 +6244,8 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* @inode: inode we're writing to
* @start: start range we are writing to
* @len: how long the range we are writing to
+ * @reserved: mandatory parameter, record actually reserved qgroup ranges of
+ * current reservation.
*
* This will do the following things
*
@@ -6141,16 +6263,17 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes)
* Return 0 for success
* Return <0 for error(-ENOSPC or -EQUOT)
*/
-int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
+int btrfs_delalloc_reserve_space(struct inode *inode,
+ struct extent_changeset **reserved, u64 start, u64 len)
{
int ret;
- ret = btrfs_check_data_free_space(inode, start, len);
+ ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
if (ret < 0)
- btrfs_free_reserved_data_space(inode, start, len);
+ btrfs_free_reserved_data_space(inode, *reserved, start, len);
return ret;
}
@@ -6169,10 +6292,11 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 start, u64 len)
* list if there are no delalloc bytes left.
* Also it will handle the qgroup reserved space.
*/
-void btrfs_delalloc_release_space(struct inode *inode, u64 start, u64 len)
+void btrfs_delalloc_release_space(struct inode *inode,
+ struct extent_changeset *reserved, u64 start, u64 len)
{
btrfs_delalloc_release_metadata(BTRFS_I(inode), len);
- btrfs_free_reserved_data_space(inode, start, len);
+ btrfs_free_reserved_data_space(inode, reserved, start, len);
}
static int update_block_group(struct btrfs_trans_handle *trans,
@@ -6248,6 +6372,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(info, "pinned",
cache->space_info->flags,
num_bytes, 1);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned,
+ num_bytes);
set_extent_dirty(info->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
@@ -6324,6 +6450,7 @@ static int pin_down_extent(struct btrfs_fs_info *fs_info,
trace_btrfs_space_reservation(fs_info, "pinned",
cache->space_info->flags, num_bytes, 1);
+ percpu_counter_add(&cache->space_info->total_bytes_pinned, num_bytes);
set_extent_dirty(fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
return 0;
@@ -6609,19 +6736,20 @@ fetch_cluster_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 *empty_cluster)
{
struct btrfs_free_cluster *ret = NULL;
- bool ssd = btrfs_test_opt(fs_info, SSD);
*empty_cluster = 0;
if (btrfs_mixed_space_info(space_info))
return ret;
- if (ssd)
- *empty_cluster = SZ_2M;
if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
ret = &fs_info->meta_alloc_cluster;
- if (!ssd)
+ if (btrfs_test_opt(fs_info, SSD))
+ *empty_cluster = SZ_2M;
+ else
*empty_cluster = SZ_64K;
- } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) {
+ } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
+ btrfs_test_opt(fs_info, SSD_SPREAD)) {
+ *empty_cluster = SZ_2M;
ret = &fs_info->data_alloc_cluster;
}
@@ -6700,7 +6828,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
u64 to_add = len;
- WARN_ON(!return_free_space);
+
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
to_add = min(len, global_rsv->size -
@@ -6786,7 +6914,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
if (ret) {
const char *errstr = btrfs_decode_error(ret);
btrfs_warn(fs_info,
- "Discard failed while removing blockgroup: errno=%d %s\n",
+ "discard failed while removing blockgroup: errno=%d %s",
ret, errstr);
}
}
@@ -6794,27 +6922,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
return 0;
}
-static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
- u64 owner, u64 root_objectid)
-{
- struct btrfs_space_info *space_info;
- u64 flags;
-
- if (owner < BTRFS_FIRST_FREE_OBJECTID) {
- if (root_objectid == BTRFS_CHUNK_TREE_OBJECTID)
- flags = BTRFS_BLOCK_GROUP_SYSTEM;
- else
- flags = BTRFS_BLOCK_GROUP_METADATA;
- } else {
- flags = BTRFS_BLOCK_GROUP_DATA;
- }
-
- space_info = __find_space_info(fs_info, flags);
- BUG_ON(!space_info); /* Logic bug */
- percpu_counter_add(&space_info->total_bytes_pinned, num_bytes);
-}
-
-
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *info,
struct btrfs_delayed_ref_node *node, u64 parent,
@@ -6935,7 +7042,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"umm, got %d back from search, was looking for %llu",
ret, bytenr);
if (ret > 0)
- btrfs_print_leaf(info, path->nodes[0]);
+ btrfs_print_leaf(path->nodes[0]);
}
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -6944,7 +7051,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
extent_slot = path->slots[0];
}
} else if (WARN_ON(ret == -ENOENT)) {
- btrfs_print_leaf(info, path->nodes[0]);
+ btrfs_print_leaf(path->nodes[0]);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
@@ -6981,7 +7088,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_err(info,
"umm, got %d back from search, was looking for %llu",
ret, bytenr);
- btrfs_print_leaf(info, path->nodes[0]);
+ btrfs_print_leaf(path->nodes[0]);
}
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@@ -7037,8 +7144,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
}
- add_pinned_bytes(info, -num_bytes, owner_objectid,
- root_objectid);
} else {
if (found_extent) {
BUG_ON(is_data && refs_to_drop !=
@@ -7170,19 +7275,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
int ret;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
- ret = btrfs_add_delayed_tree_ref(fs_info, trans,
- buf->start, buf->len,
- parent,
+ int old_ref_mod, new_ref_mod;
+
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
+ buf->len, parent,
root->root_key.objectid,
btrfs_header_level(buf),
- BTRFS_DROP_DELAYED_REF, NULL);
+ BTRFS_DROP_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
BUG_ON(ret); /* -ENOMEM */
+ pin = old_ref_mod >= 0 && new_ref_mod < 0;
}
- if (!last_ref)
- return;
-
- if (btrfs_header_generation(buf) == trans->transid) {
+ if (last_ref && btrfs_header_generation(buf) == trans->transid) {
struct btrfs_block_group_cache *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
@@ -7191,6 +7296,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
goto out;
}
+ pin = 0;
cache = btrfs_lookup_block_group(fs_info, buf->start);
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
@@ -7206,18 +7312,19 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_free_reserved_bytes(cache, buf->len, 0);
btrfs_put_block_group(cache);
trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
- pin = 0;
}
out:
if (pin)
add_pinned_bytes(fs_info, buf->len, btrfs_header_level(buf),
root->root_key.objectid);
- /*
- * Deleting the buffer, clear the corrupt flag since it doesn't matter
- * anymore.
- */
- clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
+ if (last_ref) {
+ /*
+ * Deleting the buffer, clear the corrupt flag since it doesn't
+ * matter anymore.
+ */
+ clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
+ }
}
/* Can return -ENOMEM */
@@ -7226,12 +7333,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset)
{
+ int old_ref_mod, new_ref_mod;
int ret;
if (btrfs_is_testing(fs_info))
return 0;
- add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
/*
* tree log blocks never actually go into the extent allocation
@@ -7241,19 +7348,25 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
/* unlocks the pinned mutex */
btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
+ old_ref_mod = new_ref_mod = 0;
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, (int)owner,
- BTRFS_DROP_DELAYED_REF, NULL);
+ num_bytes, parent,
+ root_objectid, (int)owner,
+ BTRFS_DROP_DELAYED_REF, NULL,
+ &old_ref_mod, &new_ref_mod);
} else {
ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
- num_bytes,
- parent, root_objectid, owner,
- offset, 0,
- BTRFS_DROP_DELAYED_REF);
+ num_bytes, parent,
+ root_objectid, owner, offset,
+ 0, BTRFS_DROP_DELAYED_REF,
+ &old_ref_mod, &new_ref_mod);
}
+
+ if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
+ add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
+
return ret;
}
@@ -7545,6 +7658,10 @@ search:
u64 offset;
int cached;
+ /* If the block group is read-only, we can skip it entirely. */
+ if (unlikely(block_group->ro))
+ continue;
+
btrfs_grab_block_group(block_group, delalloc);
search_start = block_group->key.objectid;
@@ -7580,8 +7697,6 @@ have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
goto loop;
- if (unlikely(block_group->ro))
- goto loop;
/*
* Ok we want to try and use the cluster allocator, so
@@ -7795,6 +7910,7 @@ loop:
failed_alloc = false;
BUG_ON(index != get_block_group_index(block_group));
btrfs_release_block_group(block_group, delalloc);
+ cond_resched();
}
up_read(&space_info->groups_sem);
@@ -7956,7 +8072,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 flags;
int ret;
- flags = btrfs_get_alloc_profile(root, is_data);
+ flags = get_alloc_profile_by_root(root, is_data);
again:
WARN_ON(num_bytes < fs_info->sectorsize);
ret = find_free_extent(fs_info, ram_bytes, num_bytes, empty_size,
@@ -8200,9 +8316,9 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid,
- ins->offset, 0,
- root_objectid, owner, offset,
- ram_bytes, BTRFS_ADD_DELAYED_EXTENT);
+ ins->offset, 0, root_objectid, owner,
+ offset, ram_bytes,
+ BTRFS_ADD_DELAYED_EXTENT, NULL, NULL);
return ret;
}
@@ -8422,11 +8538,11 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op->is_data = false;
extent_op->level = level;
- ret = btrfs_add_delayed_tree_ref(fs_info, trans,
- ins.objectid, ins.offset,
- parent, root_objectid, level,
+ ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid,
+ ins.offset, parent,
+ root_objectid, level,
BTRFS_ADD_DELAYED_EXTENT,
- extent_op);
+ extent_op, NULL, NULL);
if (ret)
goto out_free_delayed;
}
@@ -9150,7 +9266,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
if (err)
goto out_end_trans;
- ret = btrfs_del_root(trans, tree_root, &root->root_key);
+ ret = btrfs_del_root(trans, fs_info, &root->root_key);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
@@ -9909,11 +10025,8 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info,
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = fs_info->sectorsize;
cache->fs_info = fs_info;
- cache->full_stripe_len = btrfs_full_stripe_len(fs_info,
- &fs_info->mapping_tree,
- start);
+ cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
set_free_space_tree_thresholds(cache);
atomic_set(&cache->count, 1);
@@ -10059,19 +10172,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
}
trace_btrfs_add_block_group(info, cache, 0);
- ret = update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- cache->bytes_super, &space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- spin_lock(&info->block_group_cache_lock);
- rb_erase(&cache->cache_node,
- &info->block_group_cache_tree);
- RB_CLEAR_NODE(&cache->cache_node);
- spin_unlock(&info->block_group_cache_lock);
- btrfs_put_block_group(cache);
- goto error;
- }
+ update_space_info(info, cache->flags, found_key.offset,
+ btrfs_block_group_used(&cache->item),
+ cache->bytes_super, &space_info);
cache->space_info = space_info;
@@ -10159,8 +10262,7 @@ next:
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytes_used,
- u64 type, u64 chunk_objectid, u64 chunk_offset,
- u64 size)
+ u64 type, u64 chunk_offset, u64 size)
{
struct btrfs_block_group_cache *cache;
int ret;
@@ -10172,7 +10274,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
return -ENOMEM;
btrfs_set_block_group_used(&cache->item, bytes_used);
- btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
+ btrfs_set_block_group_chunk_objectid(&cache->item,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
btrfs_set_block_group_flags(&cache->item, type);
cache->flags = type;
@@ -10203,16 +10306,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
}
#endif
/*
- * Call to ensure the corresponding space_info object is created and
- * assigned to our block group, but don't update its counters just yet.
- * We want our bg to be added to the rbtree with its ->space_info set.
+ * Ensure the corresponding space_info object is created and
+ * assigned to our block group. We want our bg to be added to the rbtree
+ * with its ->space_info set.
*/
- ret = update_space_info(fs_info, cache->flags, 0, 0, 0,
- &cache->space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- btrfs_put_block_group(cache);
- return ret;
+ cache->space_info = __find_space_info(fs_info, cache->flags);
+ if (!cache->space_info) {
+ ret = create_space_info(fs_info, cache->flags,
+ &cache->space_info);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
}
ret = btrfs_add_block_group_cache(fs_info, cache);
@@ -10227,18 +10333,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
- ret = update_space_info(fs_info, cache->flags, size, bytes_used,
+ update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, &cache->space_info);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- spin_lock(&fs_info->block_group_cache_lock);
- rb_erase(&cache->cache_node,
- &fs_info->block_group_cache_tree);
- RB_CLEAR_NODE(&cache->cache_node);
- spin_unlock(&fs_info->block_group_cache_lock);
- btrfs_put_block_group(cache);
- return ret;
- }
update_global_block_rsv(fs_info);
__link_block_group(cache->space_info, cache);
@@ -10786,21 +10882,21 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
mixed = 1;
flags = BTRFS_BLOCK_GROUP_SYSTEM;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
if (ret)
goto out;
if (mixed) {
flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
} else {
flags = BTRFS_BLOCK_GROUP_METADATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
if (ret)
goto out;
flags = BTRFS_BLOCK_GROUP_DATA;
- ret = update_space_info(fs_info, flags, 0, 0, 0, &space_info);
+ ret = create_space_info(fs_info, flags, &space_info);
}
out:
return ret;
@@ -10975,14 +11071,14 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
}
/*
- * btrfs_{start,end}_write_no_snapshoting() are similar to
+ * btrfs_{start,end}_write_no_snapshotting() are similar to
* mnt_{want,drop}_write(), they are used to prevent some tasks from writing
* data into the page cache through nocow before the subvolume is snapshoted,
* but flush the data into disk after the snapshot creation, or to prevent
- * operations while snapshoting is ongoing and that cause the snapshot to be
+ * operations while snapshotting is ongoing and that cause the snapshot to be
* inconsistent (writes followed by expanding truncates for example).
*/
-void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
+void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
{
percpu_counter_dec(&root->subv_writers->counter);
/*
@@ -10993,9 +11089,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
wake_up(&root->subv_writers->wait);
}
-int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
+int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
{
- if (atomic_read(&root->will_be_snapshoted))
+ if (atomic_read(&root->will_be_snapshotted))
return 0;
percpu_counter_inc(&root->subv_writers->counter);
@@ -11003,14 +11099,14 @@ int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
* Make sure counter is updated before we check for snapshot creation.
*/
smp_mb();
- if (atomic_read(&root->will_be_snapshoted)) {
- btrfs_end_write_no_snapshoting(root);
+ if (atomic_read(&root->will_be_snapshotted)) {
+ btrfs_end_write_no_snapshotting(root);
return 0;
}
return 1;
}
-static int wait_snapshoting_atomic_t(atomic_t *a)
+static int wait_snapshotting_atomic_t(atomic_t *a)
{
schedule();
return 0;
@@ -11021,11 +11117,11 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
while (true) {
int ret;
- ret = btrfs_start_write_no_snapshoting(root);
+ ret = btrfs_start_write_no_snapshotting(root);
if (ret)
break;
- wait_on_atomic_t(&root->will_be_snapshoted,
- wait_snapshoting_atomic_t,
+ wait_on_atomic_t(&root->will_be_snapshotted,
+ wait_snapshotting_atomic_t,
TASK_UNINTERRUPTIBLE);
}
}