diff options
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r-- | fs/btrfs/transaction.c | 266 |
1 files changed, 130 insertions, 136 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index c6a872a8a468..d89c6d3542ca 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -31,6 +31,7 @@ #include "inode-map.h" #include "volumes.h" #include "dev-replace.h" +#include "qgroup.h" #define BTRFS_ROOT_TRANS_TAG 0 @@ -62,7 +63,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) WARN_ON(atomic_read(&transaction->use_count) == 0); if (atomic_dec_and_test(&transaction->use_count)) { BUG_ON(!list_empty(&transaction->list)); - WARN_ON(transaction->delayed_refs.root.rb_node); + WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root)); while (!list_empty(&transaction->pending_chunks)) { struct extent_map *em; @@ -75,10 +76,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) } } -static noinline void switch_commit_root(struct btrfs_root *root) +static noinline void switch_commit_roots(struct btrfs_transaction *trans, + struct btrfs_fs_info *fs_info) { - free_extent_buffer(root->commit_root); - root->commit_root = btrfs_root_node(root); + struct btrfs_root *root, *tmp; + + down_write(&fs_info->commit_root_sem); + list_for_each_entry_safe(root, tmp, &trans->switch_commits, + dirty_list) { + list_del_init(&root->dirty_list); + free_extent_buffer(root->commit_root); + root->commit_root = btrfs_root_node(root); + if (is_fstree(root->objectid)) + btrfs_unpin_free_ino(root); + } + up_write(&fs_info->commit_root_sem); } static inline void extwriter_counter_inc(struct btrfs_transaction *trans, @@ -183,8 +195,8 @@ loop: atomic_set(&cur_trans->use_count, 2); cur_trans->start_time = get_seconds(); - cur_trans->delayed_refs.root = RB_ROOT; - cur_trans->delayed_refs.num_entries = 0; + cur_trans->delayed_refs.href_root = RB_ROOT; + atomic_set(&cur_trans->delayed_refs.num_entries, 0); cur_trans->delayed_refs.num_heads_ready = 0; cur_trans->delayed_refs.num_heads = 0; cur_trans->delayed_refs.flushing = 0; @@ -196,21 +208,18 @@ loop: */ smp_mb(); if (!list_empty(&fs_info->tree_mod_seq_list)) - WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when " + WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when " "creating a fresh transaction\n"); if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) - WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " + WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when " "creating a fresh transaction\n"); atomic64_set(&fs_info->tree_mod_seq, 0); spin_lock_init(&cur_trans->delayed_refs.lock); - atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0); - atomic_set(&cur_trans->delayed_refs.ref_seq, 0); - init_waitqueue_head(&cur_trans->delayed_refs.wait); INIT_LIST_HEAD(&cur_trans->pending_snapshots); - INIT_LIST_HEAD(&cur_trans->ordered_operations); INIT_LIST_HEAD(&cur_trans->pending_chunks); + INIT_LIST_HEAD(&cur_trans->switch_commits); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -232,18 +241,19 @@ loop: static int record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - if (root->ref_cows && root->last_trans < trans->transid) { + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && + root->last_trans < trans->transid) { WARN_ON(root == root->fs_info->extent_root); WARN_ON(root->commit_root != root->node); /* - * see below for in_trans_setup usage rules + * see below for IN_TRANS_SETUP usage rules * we have the reloc mutex held now, so there * is only one writer in this function */ - root->in_trans_setup = 1; + set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); - /* make sure readers find in_trans_setup before + /* make sure readers find IN_TRANS_SETUP before * they find our root->last_trans update */ smp_wmb(); @@ -270,7 +280,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, * But, we have to set root->last_trans before we * init the relocation root, otherwise, we trip over warnings * in ctree.c. The solution used here is to flag ourselves - * with root->in_trans_setup. When this is 1, we're still + * with root IN_TRANS_SETUP. When this is 1, we're still * fixing up the reloc trees and everyone must wait. * * When this is zero, they can trust root->last_trans and fly @@ -279,8 +289,8 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, * done before we pop in the zero below */ btrfs_init_reloc_root(trans, root); - smp_wmb(); - root->in_trans_setup = 0; + smp_mb__before_atomic(); + clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); } return 0; } @@ -289,16 +299,16 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - if (!root->ref_cows) + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return 0; /* - * see record_root_in_trans for comments about in_trans_setup usage + * see record_root_in_trans for comments about IN_TRANS_SETUP usage * and barriers */ smp_rmb(); if (root->last_trans == trans->transid && - !root->in_trans_setup) + !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) return 0; mutex_lock(&root->fs_info->reloc_mutex); @@ -356,7 +366,7 @@ static int may_wait_transaction(struct btrfs_root *root, int type) static inline bool need_reserve_reloc_root(struct btrfs_root *root) { if (!root->fs_info->reloc_ctl || - !root->ref_cows || + !test_bit(BTRFS_ROOT_REF_COWS, &root->state) || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || root->reloc_root) return false; @@ -375,6 +385,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type, bool reloc_reserved = false; int ret; + /* Send isn't supposed to start transactions. */ + ASSERT(current->journal_info != (void *)BTRFS_SEND_TRANS_STUB); + if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return ERR_PTR(-EROFS); @@ -472,12 +485,14 @@ again: h->type = type; h->allocating_chunk = false; h->reloc_reserved = false; + h->sync = false; INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->new_bgs); smp_mb(); if (cur_trans->state >= TRANS_STATE_BLOCKED && may_wait_transaction(root, type)) { + current->journal_info = h; btrfs_commit_transaction(h, root); goto again; } @@ -647,7 +662,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (root->fs_info->global_block_rsv.space_info->full && - btrfs_should_throttle_delayed_refs(trans, root)) + btrfs_check_space_for_delayed_refs(trans, root)) return 1; return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); @@ -684,20 +699,35 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, unsigned long cur = trans->delayed_ref_updates; int lock = (trans->type != TRANS_JOIN_NOLOCK); int err = 0; + int must_run_delayed_refs = 0; - if (--trans->use_count) { + if (trans->use_count > 1) { + trans->use_count--; trans->block_rsv = trans->orig_rsv; return 0; } - /* - * do the qgroup accounting as early as possible - */ - err = btrfs_delayed_refs_qgroup_accounting(trans, info); - btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; + if (!list_empty(&trans->new_bgs)) + btrfs_create_pending_block_groups(trans, root); + + trans->delayed_ref_updates = 0; + if (!trans->sync) { + must_run_delayed_refs = + btrfs_should_throttle_delayed_refs(trans, root); + cur = max_t(unsigned long, cur, 32); + + /* + * don't make the caller wait if they are from a NOLOCK + * or ATTACH transaction, it will deadlock with commit + */ + if (must_run_delayed_refs == 1 && + (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH))) + must_run_delayed_refs = 2; + } + if (trans->qgroup_reserved) { /* * the same root has to be passed here between start_transaction @@ -707,16 +737,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, trans->qgroup_reserved = 0; } - if (!list_empty(&trans->new_bgs)) - btrfs_create_pending_block_groups(trans, root); - - trans->delayed_ref_updates = 0; - if (btrfs_should_throttle_delayed_refs(trans, root)) { - cur = max_t(unsigned long, cur, 1); - trans->delayed_ref_updates = 0; - btrfs_run_delayed_refs(trans, root, cur); - } - btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; @@ -733,17 +753,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, } if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { - if (throttle) { - /* - * We may race with somebody else here so end up having - * to call end_transaction on ourselves again, so inc - * our use_count. - */ - trans->use_count++; + if (throttle) return btrfs_commit_transaction(trans, root); - } else { + else wake_up_process(info->transaction_kthread); - } } if (trans->type & __TRANS_FREEZABLE) @@ -773,6 +786,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, assert_qgroups_uptodate(trans); kmem_cache_free(btrfs_trans_handle_cachep, trans); + if (must_run_delayed_refs) { + btrfs_async_run_delayed_refs(root, cur, + must_run_delayed_refs == 1); + } return err; } @@ -788,12 +805,6 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, return __btrfs_end_transaction(trans, root, 1); } -int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - return __btrfs_end_transaction(trans, root, 1); -} - /* * when btree blocks are allocated, they have some corresponding bits set for * them in one of two extent_io trees. This is used to make sure all of @@ -933,9 +944,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return ret; } - if (root != root->fs_info->extent_root) - switch_commit_root(root); - return 0; } @@ -991,15 +999,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, list_del_init(next); root = list_entry(next, struct btrfs_root, dirty_list); + if (root != fs_info->extent_root) + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); ret = update_cowonly_root(trans, root); if (ret) return ret; } - down_write(&fs_info->extent_commit_sem); - switch_commit_root(fs_info->extent_root); - up_write(&fs_info->extent_commit_sem); - + list_add_tail(&fs_info->extent_root->dirty_list, + &trans->transaction->switch_commits); btrfs_after_dev_replace_commit(fs_info); return 0; @@ -1052,15 +1061,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, btrfs_save_ino_cache(root, trans); /* see comments in should_cow_block() */ - root->force_cow = 0; - smp_wmb(); + clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); + smp_mb__after_atomic(); if (root->commit_root != root->node) { - mutex_lock(&root->fs_commit_mutex); - switch_commit_root(root); - btrfs_unpin_free_ino(root); - mutex_unlock(&root->fs_commit_mutex); - + list_add_tail(&root->dirty_list, + &trans->transaction->switch_commits); btrfs_set_root_node(&root->root_item, root->node); } @@ -1087,7 +1093,7 @@ int btrfs_defrag_root(struct btrfs_root *root) struct btrfs_trans_handle *trans; int ret; - if (xchg(&root->defrag_running, 1)) + if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state)) return 0; while (1) { @@ -1105,12 +1111,12 @@ int btrfs_defrag_root(struct btrfs_root *root) break; if (btrfs_defrag_cancelled(root->fs_info)) { - printk(KERN_DEBUG "btrfs: defrag_root cancelled\n"); + pr_debug("BTRFS: defrag_root cancelled\n"); ret = -EAGAIN; break; } } - root->defrag_running = 0; + clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state); return ret; } @@ -1174,12 +1180,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto no_free_objectid; } - pending->error = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (pending->error) - goto no_free_objectid; - key.objectid = objectid; key.offset = (u64)-1; key.type = BTRFS_ROOT_ITEM_KEY; @@ -1276,8 +1276,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } + /* + * We need to flush delayed refs in order to make sure all of our quota + * operations have been done before we call btrfs_qgroup_inherit. + */ + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } + /* see comments in should_cow_block() */ - root->force_cow = 1; + set_bit(BTRFS_ROOT_FORCE_COW, &root->state); smp_wmb(); btrfs_set_root_node(new_root_item, tmp); @@ -1586,50 +1604,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, trace_btrfs_transaction_commit(root); - btrfs_scrub_continue(root); - if (current->journal_info == trans) current->journal_info = NULL; + btrfs_scrub_cancel(root->fs_info); kmem_cache_free(btrfs_trans_handle_cachep, trans); } -static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - int ret; - - ret = btrfs_run_delayed_items(trans, root); - /* - * running the delayed items may have added new refs. account - * them now so that they hinder processing of more delayed refs - * as little as possible. - */ - if (ret) { - btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - return ret; - } - - ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); - if (ret) - return ret; - - /* - * rename don't use btrfs_join_transaction, so, once we - * set the transaction to blocked above, we aren't going - * to get any new ordered operations. We can safely run - * it here and no for sure that nothing new will be added - * to the list - */ - ret = btrfs_run_ordered_operations(trans, root, 1); - - return ret; -} - static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) - return btrfs_start_delalloc_roots(fs_info, 1); + return btrfs_start_delalloc_roots(fs_info, 1, -1); return 0; } @@ -1646,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *prev_trans = NULL; int ret; - ret = btrfs_run_ordered_operations(trans, root, 0); - if (ret) { - btrfs_abort_transaction(trans, root, ret); - btrfs_end_transaction(trans, root); - return ret; - } - /* Stop the commit early if ->aborted is set */ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; @@ -1733,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) goto cleanup_transaction; - ret = btrfs_flush_all_pending_stuffs(trans, root); + ret = btrfs_run_delayed_items(trans, root); if (ret) goto cleanup_transaction; @@ -1741,11 +1719,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, extwriter_counter_read(cur_trans) == 0); /* some pending stuffs might be added after the previous flush. */ - ret = btrfs_flush_all_pending_stuffs(trans, root); + ret = btrfs_run_delayed_items(trans, root); if (ret) goto cleanup_transaction; btrfs_wait_delalloc_flush(root->fs_info); + + btrfs_scrub_pause(root); /* * Ok now we need to make sure to block out any other joins while we * commit the transaction. We could have started a join before setting @@ -1760,7 +1740,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, /* ->aborted might be set after the previous check, so check it */ if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; - goto cleanup_transaction; + goto scrub_continue; } /* * the reloc mutex makes sure that we stop @@ -1777,7 +1757,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = create_pending_snapshots(trans, root->fs_info); if (ret) { mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } /* @@ -1793,13 +1773,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_run_delayed_items(trans, root); if (ret) { mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) { mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } /* @@ -1810,7 +1790,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, WARN_ON(cur_trans != trans->transaction); - btrfs_scrub_pause(root); /* btrfs_commit_tree_roots is responsible for getting the * various roots consistent with each other. Every pointer * in the tree of tree roots has to point to the most up to date @@ -1830,9 +1809,18 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } + /* + * Since the transaction is done, we should set the inode map cache flag + * before any other comming transaction. + */ + if (btrfs_test_opt(root, CHANGE_INODE_CACHE)) + btrfs_set_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); + else + btrfs_clear_opt(root->fs_info->mount_opt, INODE_MAP_CACHE); + /* commit_fs_roots gets rid of all the tree log roots, it is now * safe to free the root of tree log roots */ @@ -1842,7 +1830,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } /* @@ -1853,7 +1841,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = cur_trans->aborted; mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->reloc_mutex); - goto cleanup_transaction; + goto scrub_continue; } btrfs_prepare_extent_commit(trans, root); @@ -1862,11 +1850,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_set_root_node(&root->fs_info->tree_root->root_item, root->fs_info->tree_root->node); - switch_commit_root(root->fs_info->tree_root); + list_add_tail(&root->fs_info->tree_root->dirty_list, + &cur_trans->switch_commits); btrfs_set_root_node(&root->fs_info->chunk_root->root_item, root->fs_info->chunk_root->node); - switch_commit_root(root->fs_info->chunk_root); + list_add_tail(&root->fs_info->chunk_root->dirty_list, + &cur_trans->switch_commits); + + switch_commit_roots(cur_trans, root->fs_info); assert_qgroups_uptodate(trans); update_super_roots(root); @@ -1889,13 +1881,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_error(root->fs_info, ret, "Error while writing out transaction"); mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; + goto scrub_continue; } ret = write_ctree_super(trans, root, 0); if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); - goto cleanup_transaction; + goto scrub_continue; } /* @@ -1938,6 +1930,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, return ret; +scrub_continue: + btrfs_scrub_continue(root); cleanup_transaction: btrfs_trans_release_metadata(trans, root); trans->block_rsv = NULL; @@ -1978,7 +1972,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); - pr_debug("btrfs: cleaner removing %llu\n", root->objectid); + pr_debug("BTRFS: cleaner removing %llu\n", root->objectid); btrfs_kill_all_delayed_nodes(root); |