diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-08-18 12:32:10 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-08-18 12:49:48 -0400 |
commit | dded444b20dd3f47393937315b8217535ff3c51d (patch) | |
tree | c6f6b3973b5de757b458bb9e0f4c7d81e4fbef4d /libbcachefs | |
parent | 51ffcc699369deaa0fb4333a68bbbdf523afba11 (diff) |
Update bcachefs sources to dfaf9a6ee2 lib/printbuf: Clean up headers
Diffstat (limited to 'libbcachefs')
-rw-r--r-- | libbcachefs/alloc_foreground.c | 190 | ||||
-rw-r--r-- | libbcachefs/alloc_foreground.h | 8 | ||||
-rw-r--r-- | libbcachefs/backpointers.c | 41 | ||||
-rw-r--r-- | libbcachefs/bcachefs.h | 15 | ||||
-rw-r--r-- | libbcachefs/bkey.c | 80 | ||||
-rw-r--r-- | libbcachefs/bkey.h | 11 | ||||
-rw-r--r-- | libbcachefs/btree_io.c | 23 | ||||
-rw-r--r-- | libbcachefs/btree_iter.c | 165 | ||||
-rw-r--r-- | libbcachefs/btree_iter.h | 17 | ||||
-rw-r--r-- | libbcachefs/btree_key_cache.c | 15 | ||||
-rw-r--r-- | libbcachefs/btree_locking.h | 30 | ||||
-rw-r--r-- | libbcachefs/btree_types.h | 2 | ||||
-rw-r--r-- | libbcachefs/btree_update_interior.c | 83 | ||||
-rw-r--r-- | libbcachefs/debug.c | 164 | ||||
-rw-r--r-- | libbcachefs/errcode.h | 1 | ||||
-rw-r--r-- | libbcachefs/fsck.c | 14 | ||||
-rw-r--r-- | libbcachefs/journal.c | 3 | ||||
-rw-r--r-- | libbcachefs/move.c | 2 | ||||
-rw-r--r-- | libbcachefs/rebalance.c | 3 | ||||
-rw-r--r-- | libbcachefs/subvolume.c | 4 | ||||
-rw-r--r-- | libbcachefs/super-io.c | 8 | ||||
-rw-r--r-- | libbcachefs/sysfs.c | 3 | ||||
-rw-r--r-- | libbcachefs/util.c | 9 | ||||
-rw-r--r-- | libbcachefs/util.h | 2 |
24 files changed, 575 insertions, 318 deletions
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 0a9f1313..c57baa1f 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -339,6 +339,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc skipped_need_journal_commit, skipped_nouse, cl); + if (!ob) + iter.path->preserve = false; err: set_btree_iter_dontneed(&iter); bch2_trans_iter_exit(trans, &iter); @@ -379,15 +381,15 @@ static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch * journal buckets - journal buckets will be < ca->new_fs_bucket_idx */ static noinline struct open_bucket * -bch2_bucket_alloc_trans_early(struct btree_trans *trans, - struct bch_dev *ca, - enum alloc_reserve reserve, - u64 *cur_bucket, - u64 *buckets_seen, - u64 *skipped_open, - u64 *skipped_need_journal_commit, - u64 *skipped_nouse, - struct closure *cl) +bch2_bucket_alloc_early(struct btree_trans *trans, + struct bch_dev *ca, + enum alloc_reserve reserve, + u64 *cur_bucket, + u64 *buckets_seen, + u64 *skipped_open, + u64 *skipped_need_journal_commit, + u64 *skipped_nouse, + struct closure *cl) { struct btree_iter iter; struct bkey_s_c k; @@ -430,7 +432,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans, return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); } -static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, +static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, struct bch_dev *ca, enum alloc_reserve reserve, u64 *cur_bucket, @@ -445,15 +447,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, struct open_bucket *ob = NULL; int ret; - if (unlikely(!ca->mi.freespace_initialized)) - return bch2_bucket_alloc_trans_early(trans, ca, reserve, - cur_bucket, - buckets_seen, - skipped_open, - skipped_need_journal_commit, - skipped_nouse, - cl); - BUG_ON(ca->new_fs_bucket_idx); /* @@ -467,7 +460,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, break; for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k)); - *cur_bucket < k.k->p.offset && !ob; + *cur_bucket < k.k->p.offset; (*cur_bucket)++) { ret = btree_trans_too_many_iters(trans); if (ret) @@ -481,6 +474,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, skipped_need_journal_commit, skipped_nouse, k, cl); + if (ob) + break; } if (ob || ret) @@ -496,11 +491,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, * * Returns index of bucket on success, 0 on failure * */ -struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, +static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, + struct bch_dev *ca, enum alloc_reserve reserve, bool may_alloc_partial, struct closure *cl) { + struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; struct bch_dev_usage usage; bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); @@ -512,7 +509,6 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, u64 skipped_need_journal_commit = 0; u64 skipped_nouse = 0; bool waiting = false; - int ret; again: usage = bch2_dev_usage_read(ca); avail = dev_buckets_free(ca, usage, reserve); @@ -549,19 +545,26 @@ again: return ob; } - ret = bch2_trans_do(c, NULL, NULL, 0, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, - &cur_bucket, - &buckets_seen, - &skipped_open, - &skipped_need_journal_commit, - &skipped_nouse, - cl))); + ob = likely(ca->mi.freespace_initialized) + ? bch2_bucket_alloc_freelist(trans, ca, reserve, + &cur_bucket, + &buckets_seen, + &skipped_open, + &skipped_need_journal_commit, + &skipped_nouse, + cl) + : bch2_bucket_alloc_early(trans, ca, reserve, + &cur_bucket, + &buckets_seen, + &skipped_open, + &skipped_need_journal_commit, + &skipped_nouse, + cl); if (skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && !ret && !freespace_initialized && start) { + if (!ob && !freespace_initialized && start) { start = cur_bucket = 0; goto again; } @@ -570,7 +573,7 @@ again: ca->bucket_alloc_trans_early_cursor = cur_bucket; err: if (!ob) - ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found); + ob = ERR_PTR(-BCH_ERR_no_buckets_found); if (IS_ERR(ob)) { trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], @@ -590,6 +593,19 @@ err: return ob; } +struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, + enum alloc_reserve reserve, + bool may_alloc_partial, + struct closure *cl) +{ + struct open_bucket *ob; + + bch2_trans_do(c, NULL, NULL, 0, + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve, + may_alloc_partial, cl))); + return ob; +} + static int __dev_stripe_cmp(struct dev_stripe_state *stripe, unsigned l, unsigned r) { @@ -655,7 +671,7 @@ static void add_new_bucket(struct bch_fs *c, ob_push(c, ptrs, ob); } -int bch2_bucket_alloc_set(struct bch_fs *c, +static int bch2_bucket_alloc_set_trans(struct btree_trans *trans, struct open_buckets *ptrs, struct dev_stripe_state *stripe, struct bch_devs_mask *devs_may_alloc, @@ -666,11 +682,12 @@ int bch2_bucket_alloc_set(struct bch_fs *c, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc); unsigned dev; struct bch_dev *ca; - int ret = -BCH_ERR_insufficient_devices; + int ret = 0; unsigned i; BUG_ON(*nr_effective >= nr_replicas); @@ -694,16 +711,15 @@ int bch2_bucket_alloc_set(struct bch_fs *c, continue; } - ob = bch2_bucket_alloc(c, ca, reserve, + ob = bch2_bucket_alloc_trans(trans, ca, reserve, flags & BUCKET_MAY_ALLOC_PARTIAL, cl); if (!IS_ERR(ob)) bch2_dev_stripe_increment(ca, stripe); percpu_ref_put(&ca->ref); - if (IS_ERR(ob)) { - ret = PTR_ERR(ob); - - if (cl) + ret = PTR_ERR_OR_ZERO(ob); + if (ret) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) break; continue; } @@ -711,15 +727,36 @@ int bch2_bucket_alloc_set(struct bch_fs *c, add_new_bucket(c, ptrs, devs_may_alloc, nr_effective, have_cache, flags, ob); - if (*nr_effective >= nr_replicas) { - ret = 0; + if (*nr_effective >= nr_replicas) break; - } } + if (*nr_effective >= nr_replicas) + ret = 0; + else if (!ret) + ret = -BCH_ERR_insufficient_devices; + return ret; } +int bch2_bucket_alloc_set(struct bch_fs *c, + struct open_buckets *ptrs, + struct dev_stripe_state *stripe, + struct bch_devs_mask *devs_may_alloc, + unsigned nr_replicas, + unsigned *nr_effective, + bool *have_cache, + enum alloc_reserve reserve, + unsigned flags, + struct closure *cl) +{ + return bch2_trans_do(c, NULL, NULL, 0, + bch2_bucket_alloc_set_trans(&trans, ptrs, stripe, + devs_may_alloc, nr_replicas, + nr_effective, have_cache, reserve, + flags, cl)); +} + /* Allocate from stripes: */ /* @@ -824,7 +861,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c, wp->ptrs = ptrs_skip; } -static int open_bucket_add_buckets(struct bch_fs *c, +static int open_bucket_add_buckets(struct btree_trans *trans, struct open_buckets *ptrs, struct write_point *wp, struct bch_devs_list *devs_have, @@ -837,6 +874,7 @@ static int open_bucket_add_buckets(struct bch_fs *c, unsigned flags, struct closure *_cl) { + struct bch_fs *c = trans->c; struct bch_devs_mask devs; struct open_bucket *ob; struct closure *cl = NULL; @@ -868,7 +906,8 @@ static int open_bucket_add_buckets(struct bch_fs *c, target, erasure_code, nr_replicas, nr_effective, have_cache, flags, _cl); - if (bch2_err_matches(ret, BCH_ERR_freelist_empty) || + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_freelist_empty) || bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) return ret; if (*nr_effective >= nr_replicas) @@ -887,10 +926,11 @@ retry_blocking: * Try nonblocking first, so that if one device is full we'll try from * other devices: */ - ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs, + ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs, nr_replicas, nr_effective, have_cache, reserve, flags, cl); if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && !cl && _cl) { cl = _cl; @@ -1010,15 +1050,25 @@ static bool try_decrease_writepoints(struct bch_fs *c, return true; } -static struct write_point *writepoint_find(struct bch_fs *c, +static void bch2_trans_mutex_lock(struct btree_trans *trans, + struct mutex *lock) +{ + if (!mutex_trylock(lock)) { + bch2_trans_unlock(trans); + mutex_lock(lock); + } +} + +static struct write_point *writepoint_find(struct btree_trans *trans, unsigned long write_point) { + struct bch_fs *c = trans->c; struct write_point *wp, *oldest; struct hlist_head *head; if (!(write_point & 1UL)) { wp = (struct write_point *) write_point; - mutex_lock(&wp->lock); + bch2_trans_mutex_lock(trans, &wp->lock); return wp; } @@ -1027,7 +1077,7 @@ restart_find: wp = __writepoint_find(head, write_point); if (wp) { lock_wp: - mutex_lock(&wp->lock); + bch2_trans_mutex_lock(trans, &wp->lock); if (wp->write_point == write_point) goto out; mutex_unlock(&wp->lock); @@ -1040,8 +1090,8 @@ restart_find_oldest: if (!oldest || time_before64(wp->last_used, oldest->last_used)) oldest = wp; - mutex_lock(&oldest->lock); - mutex_lock(&c->write_points_hash_lock); + bch2_trans_mutex_lock(trans, &oldest->lock); + bch2_trans_mutex_lock(trans, &c->write_points_hash_lock); if (oldest >= c->write_points + c->write_points_nr || try_increase_writepoints(c)) { mutex_unlock(&c->write_points_hash_lock); @@ -1069,7 +1119,7 @@ out: /* * Get us an open_bucket we can allocate from, return with it locked: */ -struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, +struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans, unsigned target, unsigned erasure_code, struct write_point_specifier write_point, @@ -1080,6 +1130,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, unsigned flags, struct closure *cl) { + struct bch_fs *c = trans->c; struct write_point *wp; struct open_bucket *ob; struct open_buckets ptrs; @@ -1099,7 +1150,7 @@ retry: write_points_nr = c->write_points_nr; have_cache = false; - wp = writepoint_find(c, write_point.v); + wp = writepoint_find(trans, write_point.v); if (wp->data_type == BCH_DATA_user) ob_flags |= BUCKET_MAY_ALLOC_PARTIAL; @@ -1109,21 +1160,22 @@ retry: have_cache = true; if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) { - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, ob_flags, cl); } else { - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, target, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, ob_flags, NULL); - if (!ret) + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto alloc_done; - ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, + ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have, 0, erasure_code, nr_replicas, &nr_effective, &have_cache, reserve, @@ -1180,6 +1232,32 @@ err: return ERR_PTR(ret); } +struct write_point *bch2_alloc_sectors_start(struct bch_fs *c, + unsigned target, + unsigned erasure_code, + struct write_point_specifier write_point, + struct bch_devs_list *devs_have, + unsigned nr_replicas, + unsigned nr_replicas_required, + enum alloc_reserve reserve, + unsigned flags, + struct closure *cl) +{ + struct write_point *wp; + + bch2_trans_do(c, NULL, NULL, 0, + PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target, + erasure_code, + write_point, + devs_have, + nr_replicas, + nr_replicas_required, + reserve, + flags, cl))); + return wp; + +} + struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev); diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h index 8bc78877..6de63a35 100644 --- a/libbcachefs/alloc_foreground.h +++ b/libbcachefs/alloc_foreground.h @@ -136,6 +136,14 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *, unsigned, unsigned *, bool *, enum alloc_reserve, unsigned, struct closure *); +struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *, + unsigned, unsigned, + struct write_point_specifier, + struct bch_devs_list *, + unsigned, unsigned, + enum alloc_reserve, + unsigned, + struct closure *); struct write_point *bch2_alloc_sectors_start(struct bch_fs *, unsigned, unsigned, struct write_point_specifier, diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 5a46b25b..029b1ec1 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -492,7 +492,7 @@ static void backpointer_not_found(struct btree_trans *trans, prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) - bch_err(c, "%s", buf.buf); + bch_err_ratelimited(c, "%s", buf.buf); else bch2_trans_inconsistent(trans, "%s", buf.buf); @@ -526,9 +526,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) return k; - backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); - bch2_trans_iter_exit(trans, iter); + + if (bp.level) { + /* + * If a backpointer for a btree node wasn't found, it may be + * because it was overwritten by a new btree node that hasn't + * been written out yet - backpointer_get_node() checks for + * this: + */ + bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp); + bch2_trans_iter_exit(trans, iter); + return bkey_s_c_null; + } + + backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent"); return bkey_s_c_null; } @@ -540,7 +552,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree *b; - struct bkey_s_c k; BUG_ON(!bp.level); @@ -551,22 +562,24 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans, bp.level - 1, 0); b = bch2_btree_iter_peek_node(iter); - if (IS_ERR(b)) { - bch2_trans_iter_exit(trans, iter); - return b; - } + if (IS_ERR(b)) + goto err; if (extent_matches_bp(c, bp.btree_id, bp.level, bkey_i_to_s_c(&b->key), bucket, bp)) return b; - if (!btree_node_will_make_reachable(b)) - backpointer_not_found(trans, bucket, bp_offset, - bp, k, "btree node"); - + if (btree_node_will_make_reachable(b)) { + b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); + } else { + backpointer_not_found(trans, bucket, bp_offset, bp, + bkey_i_to_s_c(&b->key), "btree node"); + b = NULL; + } +err: bch2_trans_iter_exit(trans, iter); - return NULL; + return b; } static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, @@ -829,6 +842,8 @@ static int check_one_backpointer(struct btree_trans *trans, k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp); ret = bkey_err(k); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + return 0; if (ret) return ret; diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 8ffdb4de..a5bf8087 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -319,8 +319,6 @@ BCH_DEBUG_PARAMS_DEBUG() #undef BCH_DEBUG_PARAM #endif -#define BCH_LOCK_TIME_NR 128 - #define BCH_TIME_STATS() \ x(btree_node_mem_alloc) \ x(btree_node_split) \ @@ -531,9 +529,13 @@ struct btree_debug { unsigned id; }; -struct lock_held_stats { - struct time_stats times[BCH_LOCK_TIME_NR]; - const char *names[BCH_LOCK_TIME_NR]; +#define BCH_TRANSACTIONS_NR 128 + +struct btree_transaction_stats { + struct mutex lock; + struct time_stats lock_hold_times; + unsigned nr_max_paths; + char *max_paths_text; }; struct bch_fs_pcpu { @@ -930,7 +932,8 @@ struct bch_fs { struct time_stats times[BCH_TIME_STAT_NR]; - struct lock_held_stats lock_held_stats; + const char *btree_transaction_fns[BCH_TRANSACTIONS_NR]; + struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR]; }; static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages) diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c index cc068963..d348175e 100644 --- a/libbcachefs/bkey.c +++ b/libbcachefs/bkey.c @@ -19,33 +19,49 @@ const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT; struct bkey __bch2_bkey_unpack_key(const struct bkey_format *, const struct bkey_packed *); -void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits) +void bch2_bkey_packed_to_binary_text(struct printbuf *out, + const struct bkey_format *f, + const struct bkey_packed *k) { - unsigned bit = high_bit_offset, done = 0; + const u64 *p = high_word(f, k); + unsigned word_bits = 64 - high_bit_offset; + unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset; + u64 v = *p & (~0ULL >> high_bit_offset); + + if (!nr_key_bits) { + prt_str(out, "(empty)"); + return; + } while (1) { - while (bit < 64) { - if (done && !(done % 8)) - *out++ = ' '; - *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0'; - bit++; - done++; - if (done == nr_bits) { - *out++ = '\0'; - return; - } + unsigned next_key_bits = nr_key_bits; + + if (nr_key_bits < 64) { + v >>= 64 - nr_key_bits; + next_key_bits = 0; + } else { + next_key_bits -= 64; } + bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits)); + + if (!next_key_bits) + break; + + prt_char(out, ' '); + p = next_word(p); - bit = 0; + v = *p; + word_bits = 64; + nr_key_bits = next_key_bits; } } #ifdef CONFIG_BCACHEFS_DEBUG static void bch2_bkey_pack_verify(const struct bkey_packed *packed, - const struct bkey *unpacked, - const struct bkey_format *format) + const struct bkey *unpacked, + const struct bkey_format *format) { struct bkey tmp; @@ -57,23 +73,35 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed, tmp = __bch2_bkey_unpack_key(format, packed); if (memcmp(&tmp, unpacked, sizeof(struct bkey))) { - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; - char buf3[160], buf4[160]; + struct printbuf buf = PRINTBUF; - bch2_bkey_to_text(&buf1, unpacked); - bch2_bkey_to_text(&buf2, &tmp); - bch2_to_binary(buf3, (void *) unpacked, 80); - bch2_to_binary(buf4, high_word(format, packed), 80); - - panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n", + prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n", format->key_u64s, format->bits_per_field[0], format->bits_per_field[1], format->bits_per_field[2], format->bits_per_field[3], - format->bits_per_field[4], - buf1.buf, buf2.buf, buf3, buf4); + format->bits_per_field[4]); + + prt_printf(&buf, "compiled unpack: "); + bch2_bkey_to_text(&buf, unpacked); + prt_newline(&buf); + + prt_printf(&buf, "c unpack: "); + bch2_bkey_to_text(&buf, &tmp); + prt_newline(&buf); + + prt_printf(&buf, "compiled unpack: "); + bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, + (struct bkey_packed *) unpacked); + prt_newline(&buf); + + prt_printf(&buf, "c unpack: "); + bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current, + (struct bkey_packed *) &tmp); + prt_newline(&buf); + + panic("%s", buf.buf); } } diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h index 7dee3d8e..df9fb859 100644 --- a/libbcachefs/bkey.h +++ b/libbcachefs/bkey.h @@ -12,7 +12,9 @@ #define HAVE_BCACHEFS_COMPILED_UNPACK 1 #endif -void bch2_to_binary(char *, const u64 *, unsigned); +void bch2_bkey_packed_to_binary_text(struct printbuf *, + const struct bkey_format *, + const struct bkey_packed *); /* bkey with split value, const */ struct bkey_s_c { @@ -42,12 +44,15 @@ static inline size_t bkey_val_bytes(const struct bkey *k) static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s) { - k->u64s = BKEY_U64s + val_u64s; + unsigned u64s = BKEY_U64s + val_u64s; + + BUG_ON(u64s > U8_MAX); + k->u64s = u64s; } static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) { - k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); + set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64))); } #define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k))) diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index ae731b3a..8aad87ea 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -616,7 +616,6 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) (u64 *) vstruct_end(i) - (u64 *) k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift); set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); } for (k = i->start; k != vstruct_last(i); k = bkey_next(k)) @@ -626,10 +625,14 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b) if (k != vstruct_last(i)) { i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start); set_btree_bset_end(b, t); - bch2_bset_set_no_aux_tree(b, t); } } + /* + * Always rebuild search trees: eytzinger search tree nodes directly + * depend on the values of min/max key: + */ + bch2_bset_set_no_aux_tree(b, b->set); bch2_btree_build_aux_trees(b); for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { @@ -778,8 +781,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b, } static int validate_bset_keys(struct bch_fs *c, struct btree *b, - struct bset *i, unsigned *whiteout_u64s, - int write, bool have_retry) + struct bset *i, int write, bool have_retry) { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; @@ -915,7 +917,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, } while (b->written < (ptr_written ?: btree_sectors(c))) { - unsigned sectors, whiteout_u64s = 0; + unsigned sectors; struct nonce nonce; struct bch_csum csum; bool first = !b->written; @@ -984,8 +986,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (!b->written) btree_node_set_format(b, b->data->format); - ret = validate_bset_keys(c, b, i, &whiteout_u64s, - READ, have_retry); + ret = validate_bset_keys(c, b, i, READ, have_retry); if (ret) goto fsck_err; @@ -1011,11 +1012,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, if (blacklisted && !first) continue; - sort_iter_add(iter, i->start, - vstruct_idx(i, whiteout_u64s)); - sort_iter_add(iter, - vstruct_idx(i, whiteout_u64s), + vstruct_idx(i, 0), vstruct_last(i)); nonblacklisted_written = b->written; @@ -1745,7 +1743,6 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - unsigned whiteout_u64s = 0; struct printbuf buf = PRINTBUF; int ret; @@ -1758,7 +1755,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, if (ret) return ret; - ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?: + ret = validate_bset_keys(c, b, i, WRITE, false) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false); if (ret) { bch2_inconsistent_error(c); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 04a61318..1d4b9fde 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1418,16 +1418,16 @@ static __always_inline int btree_path_down(struct btree_trans *trans, if (unlikely(ret)) goto err; - mark_btree_node_locked(trans, path, level, lock_type); - btree_path_level_init(trans, path, b); - if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) && unlikely(b != btree_node_mem_ptr(tmp.k))) btree_node_mem_ptr_set(trans, path, level + 1, b); if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); + + mark_btree_node_locked(trans, path, level, lock_type); path->level = level; + btree_path_level_init(trans, path, b); bch2_btree_path_verify_locks(path); err: @@ -1872,42 +1872,69 @@ void bch2_dump_trans_updates(struct btree_trans *trans) printbuf_exit(&buf); } -noinline __cold -void bch2_dump_trans_paths_updates(struct btree_trans *trans) +void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path) +{ + prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ", + path->idx, path->ref, path->intent_ref, + path->preserve ? 'P' : ' ', + path->should_be_locked ? 'S' : ' ', + bch2_btree_ids[path->btree_id], + path->level); + bch2_bpos_to_text(out, path->pos); + + prt_printf(out, " locks %u", path->nodes_locked); +#ifdef CONFIG_BCACHEFS_DEBUG + prt_printf(out, " %pS", (void *) path->ip_allocated); +#endif + prt_newline(out); +} + +void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) { struct btree_path *path; - struct printbuf buf = PRINTBUF; unsigned idx; - trans_for_each_path_inorder(trans, path, idx) { - printbuf_reset(&buf); + trans_for_each_path_inorder(trans, path, idx) + bch2_btree_path_to_text(out, path); +} - bch2_bpos_to_text(&buf, path->pos); +noinline __cold +void bch2_dump_trans_paths_updates(struct btree_trans *trans) +{ + struct printbuf buf = PRINTBUF; - printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n", - path->idx, path->ref, path->intent_ref, - path->preserve ? 'P' : ' ', - path->should_be_locked ? 'S' : ' ', - bch2_btree_ids[path->btree_id], - path->level, - buf.buf, - path->nodes_locked, -#ifdef CONFIG_BCACHEFS_DEBUG - (void *) path->ip_allocated -#else - NULL -#endif - ); - } + bch2_trans_paths_to_text(&buf, trans); + printk(KERN_ERR "%s", buf.buf); printbuf_exit(&buf); bch2_dump_trans_updates(trans); } +noinline +static void bch2_trans_update_max_paths(struct btree_trans *trans) +{ + struct btree_transaction_stats *s = btree_trans_stats(trans); + struct printbuf buf = PRINTBUF; + + bch2_trans_paths_to_text(&buf, trans); + + if (!buf.allocation_failure) { + mutex_lock(&s->lock); + if (s->nr_max_paths < hweight64(trans->paths_allocated)) { + s->nr_max_paths = hweight64(trans->paths_allocated); + swap(s->max_paths_text, buf.buf); + } + mutex_unlock(&s->lock); + } + + printbuf_exit(&buf); +} + static struct btree_path *btree_path_alloc(struct btree_trans *trans, struct btree_path *pos) { + struct btree_transaction_stats *s = btree_trans_stats(trans); struct btree_path *path; unsigned idx; @@ -1920,6 +1947,9 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans, idx = __ffs64(~trans->paths_allocated); trans->paths_allocated |= 1ULL << idx; + if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths)) + bch2_trans_update_max_paths(trans); + path = &trans->paths[idx]; path->idx = idx; @@ -2013,12 +2043,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct struct bkey_s_c k; + EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); + EBUG_ON(!btree_node_locked(path, path->level)); + if (!path->cached) { struct btree_path_level *l = path_l(path); struct bkey_packed *_k; - EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); - _k = bch2_btree_node_iter_peek_all(&l->iter, l->b); k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null; @@ -2033,7 +2064,6 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct (path->btree_id != ck->key.btree_id || bkey_cmp(path->pos, ck->key.pos))); EBUG_ON(!ck || !ck->valid); - EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE); *u = ck->k->k; k = bkey_i_to_s_c(ck->k); @@ -2288,7 +2318,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans, * bkey_s_c_null: */ static noinline -struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) { struct btree_trans *trans = iter->trans; struct bch_fs *c = trans->c; @@ -2317,6 +2347,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos return bch2_btree_path_peek_slot(iter->key_cache_path, &u); } +static noinline +struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos) +{ + struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos); + int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_); + + return err ? bkey_s_c_err(err) : ret; +} + static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key) { struct btree_trans *trans = iter->trans; @@ -2347,15 +2386,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && k.k && (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) { - ret = bkey_err(k2); + k = k2; + ret = bkey_err(k); if (ret) { - k = k2; bch2_btree_iter_set_pos(iter, iter->pos); goto out; } - - k = k2; - iter->k = *k.k; } if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL)) @@ -2803,8 +2839,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) btree_iter_ip_allocated(iter)); ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); - if (unlikely(ret)) - return bkey_s_c_err(ret); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } if ((iter->flags & BTREE_ITER_CACHED) || !(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) { @@ -2828,13 +2866,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) } if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) && - (k = btree_trans_peek_key_cache(iter, iter->pos)).k) { - if (bkey_err(k)) { - goto out_no_locked; - } else { + (k = __btree_trans_peek_key_cache(iter, iter->pos)).k) { + if (!bkey_err(k)) iter->k = *k.k; - goto out; - } + /* We're not returning a key from iter->path: */ + goto out_no_locked; } k = bch2_btree_path_peek_slot(iter->path, &iter->k); @@ -2862,11 +2898,14 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) struct bpos pos = iter->pos; k = bch2_btree_iter_peek(iter); - iter->pos = pos; + if (unlikely(bkey_err(k))) + bch2_btree_iter_set_pos(iter, pos); + else + iter->pos = pos; } if (unlikely(bkey_err(k))) - return k; + goto out_no_locked; next = k.k ? bkey_start_pos(k.k) : POS_MAX; @@ -3195,6 +3234,7 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_reset_updates(trans); + trans->restart_count++; trans->mem_top = 0; if (trans->fs_usage_deltas) { @@ -3245,10 +3285,10 @@ u32 bch2_trans_begin(struct btree_trans *trans) void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count) { - bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans, - "trans->restart_count %u, should be %u, last restarted by %ps\n", - trans->restart_count, restart_count, - (void *) trans->last_restarted_ip); + if (trans_was_restarted(trans, restart_count)) + panic("trans->restart_count %u, should be %u, last restarted by %pS\n", + trans->restart_count, restart_count, + (void *) trans->last_restarted_ip); } static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) @@ -3269,6 +3309,22 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c) trans->updates = p; p += updates_bytes; } +static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c, + const char *fn) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++) + if (!c->btree_transaction_fns[i] || + c->btree_transaction_fns[i] == fn) { + c->btree_transaction_fns[i] = fn; + return i; + } + + pr_warn_once("BCH_TRANSACTIONS_NR not big enough!"); + return i; +} + void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned expected_nr_iters, size_t expected_mem_bytes, @@ -3284,15 +3340,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, trans->fn = fn; trans->last_begin_time = ktime_get_ns(); trans->task = current; - - while (c->lock_held_stats.names[trans->lock_name_idx] != fn - && c->lock_held_stats.names[trans->lock_name_idx] != 0) - trans->lock_name_idx++; - - if (trans->lock_name_idx >= BCH_LOCK_TIME_NR) - pr_warn_once("lock_times array not big enough!"); - else - c->lock_held_stats.names[trans->lock_name_idx] = fn; + trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn); bch2_trans_alloc_paths(trans, c); @@ -3463,9 +3511,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c) int bch2_fs_btree_iter_init(struct bch_fs *c) { - unsigned nr = BTREE_ITER_MAX; + unsigned i, nr = BTREE_ITER_MAX; int ret; + for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++) + mutex_init(&c->btree_transaction_stats[i].lock); + INIT_LIST_HEAD(&c->btree_trans_list); mutex_init(&c->btree_trans_lock); diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index f38fd25b..6ad28ff6 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -182,7 +182,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); trans->restarted = err; - trans->restart_count++; return -err; } @@ -368,7 +367,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter * static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) { + if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) { trace_trans_restart_too_many_iters(trans, _THIS_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); } @@ -392,13 +391,17 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, #define lockrestart_do(_trans, _do) \ ({ \ + u32 _restart_count; \ int _ret; \ \ do { \ - bch2_trans_begin(_trans); \ + _restart_count = bch2_trans_begin(_trans); \ _ret = (_do); \ } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \ \ + if (!_ret) \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ + \ _ret; \ }) @@ -439,7 +442,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, (_start), (_flags)); \ \ while (1) { \ - bch2_trans_begin(_trans); \ + u32 _restart_count = bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \ if (!(_k).k) { \ _ret = 0; \ @@ -451,6 +454,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, continue; \ if (_ret) \ break; \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_advance(&(_iter))) \ break; \ } \ @@ -468,7 +472,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, (_start), (_flags)); \ \ while (1) { \ - bch2_trans_begin(_trans); \ + u32 _restart_count = bch2_trans_begin(_trans); \ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\ if (!(_k).k) { \ _ret = 0; \ @@ -480,6 +484,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, continue; \ if (_ret) \ break; \ + bch2_trans_verify_not_restarted(_trans, _restart_count);\ if (!bch2_btree_iter_rewind(&(_iter))) \ break; \ } \ @@ -535,6 +540,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, /* new multiple iterator interface: */ void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); +void bch2_btree_path_to_text(struct printbuf *, struct btree_path *); +void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); void bch2_dump_trans_updates(struct btree_trans *); void bch2_dump_trans_paths_updates(struct btree_trans *); void __bch2_trans_init(struct btree_trans *, struct bch_fs *, diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index fa90581f..38b16f95 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -631,11 +631,22 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, void bch2_btree_key_cache_drop(struct btree_trans *trans, struct btree_path *path) { + struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; - ck->valid = false; + BUG_ON(!ck->valid); - BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags)); + /* + * We just did an update to the btree, bypassing the key cache: the key + * cache key is now stale and must be dropped, even if dirty: + */ + if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { + clear_bit(BKEY_CACHED_DIRTY, &ck->flags); + atomic_long_dec(&c->btree_key_cache.nr_dirty); + bch2_journal_pin_drop(&c->journal, &ck->journal); + } + + ck->valid = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h index c3f3cb87..205c6b59 100644 --- a/libbcachefs/btree_locking.h +++ b/libbcachefs/btree_locking.h @@ -115,6 +115,26 @@ btree_lock_want(struct btree_path *path, int level) return BTREE_NODE_UNLOCKED; } +static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans) +{ + return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats) + ? &trans->c->btree_transaction_stats[trans->fn_idx] + : NULL; +} + +static void btree_trans_lock_hold_time_update(struct btree_trans *trans, + struct btree_path *path, unsigned level) +{ +#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS + struct btree_transaction_stats *s = btree_trans_stats(trans); + + if (s) + __bch2_time_stats_update(&s->lock_hold_times, + path->l[level].lock_taken_time, + ktime_get_ns()); +#endif +} + static inline void btree_node_unlock(struct btree_trans *trans, struct btree_path *path, unsigned level) { @@ -124,15 +144,7 @@ static inline void btree_node_unlock(struct btree_trans *trans, if (lock_type != BTREE_NODE_UNLOCKED) { six_unlock_type(&path->l[level].b->c.lock, lock_type); -#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS - if (trans->lock_name_idx < BCH_LOCK_TIME_NR) { - struct bch_fs *c = trans->c; - - __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx], - path->l[level].lock_taken_time, - ktime_get_ns()); - } -#endif + btree_trans_lock_hold_time_update(trans, path, level); } mark_btree_node_unlocked(path, level); } diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 1ff99917..21d76181 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -392,6 +392,7 @@ struct btree_trans { struct task_struct *task; int srcu_idx; + u8 fn_idx; u8 nr_sorted; u8 nr_updates; u8 traverse_all_idx; @@ -432,7 +433,6 @@ struct btree_trans { unsigned journal_u64s; unsigned journal_preres_u64s; struct replicas_delta_list *fs_usage_deltas; - int lock_name_idx; }; #define BTREE_FLAGS() \ diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index e4138614..0409737f 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -178,12 +178,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, six_unlock_intent(&b->c.lock); } -static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, +static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, bool interior_node, unsigned flags) { + struct bch_fs *c = trans->c; struct write_point *wp; struct btree *b; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; @@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, mutex_unlock(&c->btree_reserve_cache_lock); retry: - wp = bch2_alloc_sectors_start(c, + wp = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: c->opts.foreground_target, 0, @@ -412,18 +413,16 @@ static void bch2_btree_reserve_put(struct btree_update *as) } } -static int bch2_btree_reserve_get(struct btree_update *as, +static int bch2_btree_reserve_get(struct btree_trans *trans, + struct btree_update *as, unsigned nr_nodes[2], - unsigned flags) + unsigned flags, + struct closure *cl) { struct bch_fs *c = as->c; - struct closure cl; struct btree *b; unsigned interior; - int ret; - - closure_init_stack(&cl); -retry: + int ret = 0; BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX); @@ -434,18 +433,17 @@ retry: * BTREE_INSERT_NOWAIT only applies to btree node allocation, not * blocking on this lock: */ - ret = bch2_btree_cache_cannibalize_lock(c, &cl); + ret = bch2_btree_cache_cannibalize_lock(c, cl); if (ret) - goto err; + return ret; for (interior = 0; interior < 2; interior++) { struct prealloc_nodes *p = as->prealloc_nodes + interior; while (p->nr < nr_nodes[interior]) { - b = __bch2_btree_node_alloc(c, &as->disk_res, - flags & BTREE_INSERT_NOWAIT - ? NULL : &cl, - interior, flags); + b = __bch2_btree_node_alloc(trans, &as->disk_res, + flags & BTREE_INSERT_NOWAIT ? NULL : cl, + interior, flags); if (IS_ERR(b)) { ret = PTR_ERR(b); goto err; @@ -454,18 +452,8 @@ retry: p->b[p->nr++] = b; } } - - bch2_btree_cache_cannibalize_unlock(c); - closure_sync(&cl); - return 0; err: bch2_btree_cache_cannibalize_unlock(c); - closure_sync(&cl); - - if (ret == -EAGAIN) - goto retry; - - trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl); return ret; } @@ -980,6 +968,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned update_level = level; int journal_flags = flags & JOURNAL_WATERMARK_MASK; int ret = 0; + u32 restart_count = trans->restart_count; BUG_ON(!path->should_be_locked); @@ -1053,16 +1042,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - bch2_trans_unlock(trans); - ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, BTREE_UPDATE_JOURNAL_RES, - journal_flags); + journal_flags|JOURNAL_RES_GET_NONBLOCK); if (ret) { - bch2_btree_update_free(as); - trace_trans_restart_journal_preres_get(trans, _RET_IP_); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); - return ERR_PTR(ret); + bch2_trans_unlock(trans); + + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + BTREE_UPDATE_JOURNAL_RES, + journal_flags); + if (ret) { + trace_trans_restart_journal_preres_get(trans, _RET_IP_); + ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get); + goto err; + } + + ret = bch2_trans_relock(trans); + if (ret) + goto err; } ret = bch2_disk_reservation_get(c, &as->disk_res, @@ -1072,14 +1069,32 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, if (ret) goto err; - ret = bch2_btree_reserve_get(as, nr_nodes, flags); - if (ret) + ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL); + if (ret == -EAGAIN || + ret == -ENOMEM) { + struct closure cl; + + closure_init_stack(&cl); + + bch2_trans_unlock(trans); + + do { + ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); + closure_sync(&cl); + } while (ret == -EAGAIN); + } + + if (ret) { + trace_btree_reserve_get_fail(trans->fn, _RET_IP_, + nr_nodes[0] + nr_nodes[1]); goto err; + } ret = bch2_trans_relock(trans); if (ret) goto err; + bch2_trans_verify_not_restarted(trans, restart_count); return as; err: bch2_btree_update_free(as); diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index cd37a101..f35e714e 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -199,7 +199,7 @@ struct dump_iter { ssize_t ret; /* bytes read so far */ }; -static int flush_buf(struct dump_iter *i) +static ssize_t flush_buf(struct dump_iter *i) { if (i->buf.pos) { size_t bytes = min_t(size_t, i->buf.pos, i->size); @@ -215,7 +215,7 @@ static int flush_buf(struct dump_iter *i) memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos); } - return 0; + return i->size ? 0 : i->ret; } static int bch2_dump_open(struct inode *inode, struct file *file) @@ -253,7 +253,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; @@ -261,14 +261,11 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - err = for_each_btree_key2(&trans, iter, i->id, i->from, + ret = for_each_btree_key2(&trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ - err = flush_buf(i); - if (err) - break; - - if (!i->size) + ret = flush_buf(i); + if (ret) break; bch2_bkey_val_to_text(&i->buf, i->c, k); @@ -277,12 +274,12 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, })); i->from = iter.pos; - if (!err) - err = flush_buf(i); + if (!ret) + ret = flush_buf(i); bch2_trans_exit(&trans); - return err ?: i->ret; + return ret ?: i->ret; } static const struct file_operations btree_debug_ops = { @@ -299,43 +296,39 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct btree *b; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; + ret = flush_buf(i); + if (ret) + return ret; - if (!i->size || !bpos_cmp(SPOS_MAX, i->from)) + if (!bpos_cmp(SPOS_MAX, i->from)) return i->ret; bch2_trans_init(&trans, i->c, 0, 0); - for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) { - bch2_btree_node_to_text(&i->buf, i->c, b); - err = flush_buf(i); - if (err) + for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) { + ret = flush_buf(i); + if (ret) break; - /* - * can't easily correctly restart a btree node traversal across - * all nodes, meh - */ + bch2_btree_node_to_text(&i->buf, i->c, b); i->from = bpos_cmp(SPOS_MAX, b->key.k.p) ? bpos_successor(b->key.k.p) : b->key.k.p; - - if (!i->size) - break; } bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); - return err < 0 ? err : i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -352,33 +345,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, struct btree_trans trans; struct btree_iter iter; struct bkey_s_c k; - int err; + ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - return i->ret; + ret = flush_buf(i); + if (ret) + return ret; bch2_trans_init(&trans, i->c, 0, 0); - err = for_each_btree_key2(&trans, iter, i->id, i->from, + ret = for_each_btree_key2(&trans, iter, i->id, i->from, BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ({ struct btree_path_level *l = &iter.path->l[0]; struct bkey_packed *_k = bch2_btree_node_iter_peek(&l->iter, l->b); - err = flush_buf(i); - if (err) - break; - - if (!i->size) + ret = flush_buf(i); + if (ret) break; if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) { @@ -391,12 +378,12 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, })); i->from = iter.pos; - if (!err) - err = flush_buf(i); - bch2_trans_exit(&trans); - return err ?: i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -409,7 +396,8 @@ static const struct file_operations bfloat_failed_debug_ops = { static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, struct btree *b) { - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); prt_printf(out, "%px btree=%s l=%u ", b, @@ -466,7 +454,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; bool done = false; - int err; + ssize_t ret = 0; i->ubuf = buf; i->size = size; @@ -477,12 +465,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, struct rhash_head *pos; struct btree *b; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - break; + ret = flush_buf(i); + if (ret) + return ret; rcu_read_lock(); i->buf.atomic++; @@ -500,9 +485,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, } while (!done); if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations cached_btree_nodes_ops = { @@ -538,7 +526,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, struct dump_iter *i = file->private_data; struct bch_fs *c = i->c; struct btree_trans *trans; - int err; + ssize_t ret = 0; i->ubuf = buf; i->size = size; @@ -549,12 +537,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, if (trans->task->pid <= i->iter) continue; - err = flush_buf(i); - if (err) - return err; - - if (!i->size) - break; + ret = flush_buf(i); + if (ret) + return ret; bch2_btree_trans_to_text(&i->buf, trans); @@ -570,9 +555,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, mutex_unlock(&c->btree_trans_lock); if (i->buf.allocation_failure) - return -ENOMEM; + ret = -ENOMEM; - return i->ret; + if (!ret) + ret = flush_buf(i); + + return ret ?: i->ret; } static const struct file_operations btree_transactions_ops = { @@ -651,14 +639,16 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct lock_held_stats *lhs = &i->c->lock_held_stats; + struct bch_fs *c = i->c; int err; i->ubuf = buf; i->size = size; i->ret = 0; - while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) { + while (1) { + struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter]; + err = flush_buf(i); if (err) return err; @@ -666,11 +656,37 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf, if (!i->size) break; - prt_printf(&i->buf, "%s:", lhs->names[i->iter]); + if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) || + !c->btree_transaction_fns[i->iter]) + break; + + prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]); prt_newline(&i->buf); - printbuf_indent_add(&i->buf, 8); - bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]); - printbuf_indent_sub(&i->buf, 8); + printbuf_indent_add(&i->buf, 2); + + mutex_lock(&s->lock); + + if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { + prt_printf(&i->buf, "Lock hold times:"); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + bch2_time_stats_to_text(&i->buf, &s->lock_hold_times); + printbuf_indent_sub(&i->buf, 2); + } + + if (s->max_paths_text) { + prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths); + prt_newline(&i->buf); + + printbuf_indent_add(&i->buf, 2); + prt_str_indented(&i->buf, s->max_paths_text); + printbuf_indent_sub(&i->buf, 2); + } + + mutex_unlock(&s->lock); + + printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; } @@ -716,10 +732,8 @@ void bch2_fs_debug_init(struct bch_fs *c) debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, c->btree_debug, &journal_pins_ops); - if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) { - debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir, - c, &lock_held_stats_op); - } + debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir, + c, &lock_held_stats_op); c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); if (IS_ERR_OR_NULL(c->btree_debug_dir)) diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 15a1be2f..232f7c79 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -37,6 +37,7 @@ x(no_btree_node, no_btree_node_down) \ x(no_btree_node, no_btree_node_init) \ x(no_btree_node, no_btree_node_cached) \ + x(0, backpointer_to_overwritten_btree_node) \ x(0, lock_fail_node_reused) \ x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index c93e177a..1a841146 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -290,7 +290,7 @@ err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; - return ret; + return ret ?: -BCH_ERR_transaction_restart_nested; } static int __remove_dirent(struct btree_trans *trans, struct bpos pos) @@ -914,7 +914,7 @@ static int check_inode(struct btree_trans *trans, bch2_fs_lazy_rw(c); ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot); - if (ret) + if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) bch_err(c, "error in fsck: error while deleting inode: %s", bch2_err_str(ret)); return ret; @@ -1149,13 +1149,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) } } fsck_err: - if (ret) { + if (ret) bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret)); - return ret; - } - if (trans_was_restarted(trans, restart_count)) - return -BCH_ERR_transaction_restart_nested; - return 0; + if (!ret && trans_was_restarted(trans, restart_count)) + ret = -BCH_ERR_transaction_restart_nested; + return ret; } static int check_extent(struct btree_trans *trans, struct btree_iter *iter, diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d77092aa..3f1cf1ac 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1255,8 +1255,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) u64 seq; unsigned i; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 24); out->atomic++; - out->tabstops[0] = 24; rcu_read_lock(); s = READ_ONCE(j->reservations); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 2fc24745..22470067 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -636,6 +636,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, b = bch2_backpointer_get_node(&trans, &iter, bucket, bp_offset, bp); ret = PTR_ERR_OR_ZERO(b); + if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) + continue; if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index ecc64dd9..17b289b0 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -268,7 +268,8 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c) struct bch_fs_rebalance *r = &c->rebalance; struct rebalance_work w = rebalance_work(c); - out->tabstops[0] = 20; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx); prt_tab(out); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 24244bc3..fb3f8e40 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -278,8 +278,8 @@ int bch2_fs_check_snapshots(struct bch_fs *c) bch2_trans_init(&trans, c, 0, 0); - ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots, - POS(BCACHEFS_ROOT_INO, 0), + ret = for_each_btree_key_commit(&trans, iter, + BTREE_ID_snapshots, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL, check_snapshot(&trans, &iter, k)); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 55f8c65a..ade09bdf 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1427,8 +1427,8 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR ? bch2_sb_field_ops[type] : NULL; - if (!out->tabstops[0]) - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); if (ops) prt_printf(out, "%s", bch2_sb_fields[type]); @@ -1476,8 +1476,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, u64 fields_have = 0; unsigned nr_devices = 0; - if (!out->tabstops[0]) - out->tabstops[0] = 32; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 32); mi = bch2_sb_get_members(sb); if (mi) { diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 2c650055..2dfed1ff 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -560,7 +560,8 @@ SHOW(bch2_fs_counters) u64 counter = 0; u64 counter_since_mount = 0; - out->tabstops[0] = 32; + printbuf_tabstop_push(out, 32); + #define x(t, ...) \ if (attr == &sysfs_##t) { \ counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\ diff --git a/libbcachefs/util.c b/libbcachefs/util.c index ee2c7d9e..42da6623 100644 --- a/libbcachefs/util.c +++ b/libbcachefs/util.c @@ -268,6 +268,12 @@ static void bch2_quantiles_update(struct quantiles *q, u64 v) } } +void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits) +{ + while (nr_bits) + prt_char(out, '0' + ((v >> --nr_bits) & 1)); +} + /* time stats: */ static void bch2_time_stats_update_one(struct time_stats *stats, @@ -526,7 +532,8 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd) void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) { - out->tabstops[0] = 20; + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); prt_printf(out, "rate:"); prt_tab(out); diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 1fe66fd9..ab7e43d4 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -353,6 +353,8 @@ bool bch2_is_zero(const void *, size_t); u64 bch2_read_flag_list(char *, const char * const[]); +void bch2_prt_u64_binary(struct printbuf *, u64, unsigned); + #define NR_QUANTILES 15 #define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) #define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) |