From 4d5a0565cebf12c2ef854e4ac1961f13a710a950 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 30 Apr 2012 11:17:25 +0200 Subject: Btrfs: remove call to btrfs_header_nritems with no effect This is a leftover from cleanup patch 559af821. Before the cleanup, btrfs_header_nritems was called inside an if condition. As it has no side effects we need to preserve here, it should simply be dropped. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cfdc50a..836e4e03edca 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1650,8 +1650,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - btrfs_header_nritems(mid); - left = read_node_slot(root, parent, pslot - 1); if (left) { btrfs_tree_lock(left); @@ -1681,7 +1679,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, wret = push_node_left(trans, root, left, mid, 1); if (wret < 0) ret = wret; - btrfs_header_nritems(mid); } /* -- cgit v1.2.3 From 8ba97a15e7d4f70b9af71fa1db86a28dd17ad1b2 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 4 Jun 2012 16:54:57 +0200 Subject: Btrfs: use btrfs_read_lock_root_node in get_old_root get_old_root could race with root node updates because we weren't locking the node early enough. Use btrfs_read_lock_root_node to grab the root locked in the very beginning and release the lock as soon as possible (just like btrfs_search_slot does). Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 836e4e03edca..2cde7b0a0106 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1143,6 +1143,13 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, return eb_rewin; } +/* + * get_old_root() rewinds the state of @root's root node to the given @time_seq + * value. If there are no changes, the current root->root_node is returned. If + * anything changed in between, there's a fresh buffer allocated on which the + * rewind operations are done. In any case, the returned buffer is read locked. + * Returns NULL on error (with no locks held). + */ static inline struct extent_buffer * get_old_root(struct btrfs_root *root, u64 time_seq) { @@ -1151,6 +1158,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_root *old_root; u64 old_generation; + eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); if (!tm) return root->node; @@ -1173,15 +1181,21 @@ get_old_root(struct btrfs_root *root, u64 time_seq) /* there's a root replace operation for the current root */ eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, root->nodesize); + } + btrfs_tree_read_unlock(root->node); + free_extent_buffer(root->node); + if (!eb) + return NULL; + btrfs_tree_read_lock(eb); + if (old_root->logical != root->node->start) { btrfs_set_header_bytenr(eb, eb->start); btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(eb, root->root_key.objectid); } - if (!eb) - return NULL; btrfs_set_header_level(eb, old_root->level); btrfs_set_header_generation(eb, old_generation); __tree_mod_log_rewind(eb, time_seq, tm); + extent_buffer_get(eb); return eb; } @@ -2612,9 +2626,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key, again: b = get_old_root(root, time_seq); - extent_buffer_get(b); level = btrfs_header_level(b); - btrfs_tree_read_lock(b); p->locks[level] = BTRFS_READ_LOCK; while (b) { -- cgit v1.2.3 From a95236d99fa56766f11056903439f55fe5038bcf Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Tue, 5 Jun 2012 16:41:24 +0200 Subject: Btrfs: fix return value for __tree_mod_log_oldest_root In __tree_mod_log_oldest_root() we must return the found operation even if it's not a ROOT_REPLACE operation. Otherwise, the caller assumes that there are no operations to be rewinded and returns immediately. The code in the caller is modified to improve readability. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2cde7b0a0106..50d7c99ddce7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1023,6 +1023,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, looped = 1; } + /* if there's no old root to return, return what we found instead */ + if (!found) + found = tm; + return found; } @@ -1155,18 +1159,24 @@ get_old_root(struct btrfs_root *root, u64 time_seq) { struct tree_mod_elem *tm; struct extent_buffer *eb; - struct tree_mod_root *old_root; + struct tree_mod_root *old_root = NULL; u64 old_generation; + u64 logical; eb = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq); if (!tm) return root->node; - old_root = &tm->old_root; - old_generation = tm->generation; + if (tm->op == MOD_LOG_ROOT_REPLACE) { + old_root = &tm->old_root; + old_generation = tm->generation; + logical = old_root->logical; + } else { + logical = root->node->start; + } - tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); + tm = tree_mod_log_search(root->fs_info, logical, time_seq); /* * there was an item in the log when __tree_mod_log_oldest_root * returned. this one must not go away, because the time_seq passed to @@ -1174,26 +1184,23 @@ get_old_root(struct btrfs_root *root, u64 time_seq) */ BUG_ON(!tm); - if (old_root->logical == root->node->start) { - /* there are logged operations for the current root */ - eb = btrfs_clone_extent_buffer(root->node); - } else { - /* there's a root replace operation for the current root */ + if (old_root) eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, root->nodesize); - } + else + eb = btrfs_clone_extent_buffer(root->node); btrfs_tree_read_unlock(root->node); free_extent_buffer(root->node); if (!eb) return NULL; btrfs_tree_read_lock(eb); - if (old_root->logical != root->node->start) { + if (old_root) { btrfs_set_header_bytenr(eb, eb->start); btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); btrfs_set_header_owner(eb, root->root_key.objectid); + btrfs_set_header_level(eb, old_root->level); + btrfs_set_header_generation(eb, old_generation); } - btrfs_set_header_level(eb, old_root->level); - btrfs_set_header_generation(eb, old_generation); __tree_mod_log_rewind(eb, time_seq, tm); extent_buffer_get(eb); -- cgit v1.2.3 From 3d7806eca43e73a9721d2e09369200ed93036bd0 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 11 Jun 2012 08:29:29 +0200 Subject: Btrfs: add btrfs_next_old_leaf To make sense of the tree mod log, the backref walker not only needs btrfs_search_old_slot, but it also called btrfs_next_leaf, which in turn was calling btrfs_search_slot. This obviously didn't give the correct result. This commit adds btrfs_next_old_leaf, a drop-in replacement for btrfs_next_leaf with a time_seq parameter. If it is zero, it behaves exactly like btrfs_next_leaf. If it is non-zero, it will use btrfs_search_old_slot with this time_seq parameter. Signed-off-by: Jan Schmidt --- fs/btrfs/backref.c | 7 ++++--- fs/btrfs/ctree.c | 11 ++++++++++- fs/btrfs/ctree.h | 2 ++ 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 579131de1b3b..8f7d1237b7a0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -179,7 +179,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, int level, - struct btrfs_key *key, u64 wanted_disk_byte, + struct btrfs_key *key, u64 time_seq, + u64 wanted_disk_byte, const u64 *extent_item_pos) { int ret; @@ -212,7 +213,7 @@ add_parent: */ while (1) { eie = NULL; - ret = btrfs_next_leaf(root, path); + ret = btrfs_next_old_leaf(root, path, time_seq); if (ret < 0) return ret; if (ret) @@ -297,7 +298,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, if (level == 0) btrfs_item_key_to_cpu(eb, &key, path->slots[0]); - ret = add_all_parents(root, path, parents, level, &key, + ret = add_all_parents(root, path, parents, level, &key, time_seq, ref->wanted_disk_byte, extent_item_pos); out: btrfs_free_path(path); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50d7c99ddce7..cb76b2a1b908 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5016,6 +5016,12 @@ next: * returns < 0 on io errors. */ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) +{ + return btrfs_next_old_leaf(root, path, 0); +} + +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq) { int slot; int level; @@ -5041,7 +5047,10 @@ again: path->keep_locks = 1; path->leave_spinning = 1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (time_seq) + ret = btrfs_search_old_slot(root, &key, path, time_seq); + else + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0151ca1ac657..db15e9e7b91c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2753,6 +2753,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, } int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, + u64 time_seq); static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) { ++p->slots[0]; -- cgit v1.2.3 From 3310c36eef330766fd23d50796287ad764929e24 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Mon, 11 Jun 2012 10:52:38 +0200 Subject: Btrfs: fix race in tree mod log addition When adding to the tree modification log, we grab two locks at different stages. We must not drop the outer lock until we're done with section protected by the inner lock. This moves the unlock call for the outer lock to the appropriate position. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cb76b2a1b908..04b06bcf2ca9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, return 0; } +/* + * This allocates memory and gets a tree modification sequence number when + * needed. + * + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. + */ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, struct tree_mod_elem **tm_ret) { @@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags, */ kfree(tm); seq = 0; + spin_unlock(&fs_info->tree_mod_seq_lock); } else { __get_tree_mod_seq(fs_info, &tm->elem); seq = tm->elem.seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); return seq; } @@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info, tm->slot = slot; tm->generation = btrfs_node_ptr_generation(eb, slot); - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, tm->move.nr_items = nr_items; tm->op = MOD_LOG_MOVE_KEYS; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static noinline int @@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, tm->generation = btrfs_header_generation(old_root); tm->op = MOD_LOG_ROOT_REPLACE; - return __tree_mod_log_insert(fs_info, tm); + ret = __tree_mod_log_insert(fs_info, tm); + spin_unlock(&fs_info->tree_mod_seq_lock); + return ret; } static struct tree_mod_elem * -- cgit v1.2.3 From 4325edd0786fdd3909f9550e52a963b2fe54f78b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Jun 2012 20:02:02 -0400 Subject: Btrfs: init old_generation in get_old_root gcc was giving an uninit variable warning here. Strictly speaking we don't need to init it, but this will make things much less error prone. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 04b06bcf2ca9..15cbc2bf4ff0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1175,7 +1175,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_elem *tm; struct extent_buffer *eb; struct tree_mod_root *old_root = NULL; - u64 old_generation; + u64 old_generation = 0; u64 logical; eb = btrfs_read_lock_root_node(root); -- cgit v1.2.3 From 28da9fb4467f7a650cd31af6dfad3a4e4a3abf6e Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 21 Jun 2012 10:59:13 +0200 Subject: Btrfs: fix tree mod log for root replacements at leaf level For the tree mod log, we don't log any operations at leaf level. If the root is at the leaf level (i.e. the tree consists only of the root), then __tree_mod_log_oldest_root will find a ROOT_REPLACE operation in the log (because we always log that one no matter which level), but no other operations. With this patch __tree_mod_log_oldest_root exits cleanly instead of BUGging in this situation. get_old_root checks if its really a root at leaf level in case we don't have any operations and WARNs if this assumption breaks. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 15cbc2bf4ff0..7d1e4fc5fb6a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, if (!looped && !tm) return 0; /* - * we must have key remove operations in the log before the - * replace operation. + * if there are no tree operation for the oldest root, we simply + * return it. this should only happen if that (old) root is at + * level 0. */ - BUG_ON(!tm); + if (!tm) + break; + /* + * if there's an operation that's not a root replacement, we + * found the oldest version of our root. normally, we'll find a + * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. + */ if (tm->op != MOD_LOG_ROOT_REPLACE) break; @@ -1192,16 +1199,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } tm = tree_mod_log_search(root->fs_info, logical, time_seq); - /* - * there was an item in the log when __tree_mod_log_oldest_root - * returned. this one must not go away, because the time_seq passed to - * us must be blocking its removal. - */ - BUG_ON(!tm); - if (old_root) - eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, - root->nodesize); + eb = alloc_dummy_extent_buffer(logical, root->nodesize); else eb = btrfs_clone_extent_buffer(root->node); btrfs_tree_read_unlock(root->node); @@ -1216,7 +1215,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_set_header_level(eb, old_root->level); btrfs_set_header_generation(eb, old_generation); } - __tree_mod_log_rewind(eb, time_seq, tm); + if (tm) + __tree_mod_log_rewind(eb, time_seq, tm); + else + WARN_ON(btrfs_header_level(eb) != 0); extent_buffer_get(eb); return eb; -- cgit v1.2.3 From c3e0696523862c48b4d8c73ffb2867e9db478338 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 21 Jun 2012 11:01:06 +0200 Subject: Btrfs: always put insert_ptr modifications into the tree mod log Several callers of insert_ptr set the tree_mod_log parameter to 0 to avoid addition to the tree mod log. In fact, we need all of those operations. This commit simply removes the additional parameter and makes addition to the tree mod log unconditional. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7d1e4fc5fb6a..e005d9b04616 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2997,7 +2997,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, - int slot, int level, int tree_mod_log) + int slot, int level) { struct extent_buffer *lower; int nritems; @@ -3010,7 +3010,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != nritems) { - if (tree_mod_log && level) + if (level) tree_mod_log_eb_move(root->fs_info, lower, slot + 1, slot, nritems - slot); memmove_extent_buffer(lower, @@ -3018,7 +3018,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - if (tree_mod_log && level) { + if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, MOD_LOG_KEY_ADD); BUG_ON(ret < 0); @@ -3106,7 +3106,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(split); insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1, 1); + path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -3643,7 +3643,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -3850,7 +3850,7 @@ again: if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3859,7 +3859,7 @@ again: } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1], 1, 0); + path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; -- cgit v1.2.3 From 19956c7e94a7a58d6df8c4db5ae62f9109a7c663 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 22 Jun 2012 14:52:13 +0200 Subject: Btrfs: fix tree mod log rewind of ADD operations When a MOD_LOG_KEY_ADD operation is rewinded, we remove the key from the tree block. If its not the last key, removal involves a move operation. This move operation was explicitly done before this commit. However, at insertion time, there's a move operation before the actual addition to make room for the new key, which is recorded in the tree mod log as well. This means, we must drop the move operation when rewinding the add operation, because the next operation we'll be rewinding will be the corresponding MOD_LOG_MOVE_KEYS operation. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e005d9b04616..b98f8604f4f6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1094,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, tm->generation); break; case MOD_LOG_KEY_ADD: - if (tm->slot != n - 1) { - o_dst = btrfs_node_key_ptr_offset(tm->slot); - o_src = btrfs_node_key_ptr_offset(tm->slot + 1); - memmove_extent_buffer(eb, o_dst, o_src, p_size); - } + /* if a move operation is needed it's in the log */ n--; break; case MOD_LOG_MOVE_KEYS: -- cgit v1.2.3 From d42244a0d36ad0939c5f173ebf15841a0678899c Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Fri, 22 Jun 2012 14:51:15 +0200 Subject: Btrfs: resolve tree mod log locking issue in btrfs_next_leaf With the tree mod log, we may end up with two roots (the current root and a rewinded version of it) both pointing to two leaves, l1 and l2, of which l2 had already been cow-ed in the current transaction. If we don't rewind any tree blocks, we cannot have two roots both pointing to an already cowed tree block. Now there is btrfs_next_leaf, which has a leaf locked and wants a lock on the next (right) leaf. And there is push_leaf_left, which has a (cowed!) leaf locked and wants a lock on the previous (left) leaf. In order to solve this dead lock situation, we use try_lock in btrfs_next_leaf (only in case it's called with a tree mod log time_seq paramter) and if we fail to get a lock on the next leaf, we give up our lock on the current leaf and retry from the very beginning. Signed-off-by: Jan Schmidt --- fs/btrfs/ctree.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index b98f8604f4f6..8206b3900587 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5119,6 +5119,18 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && time_seq) { + /* + * If we don't get the lock, we may be racing + * with push_leaf_left, holding that lock while + * itself waiting for the leaf we've currently + * locked. To solve this situation, we give up + * on our lock and cycle. + */ + btrfs_release_path(path); + cond_resched(); + goto again; + } if (!ret) { btrfs_set_path_blocking(path); btrfs_tree_read_lock(next); -- cgit v1.2.3