diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-02-27 12:01:32 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2022-02-27 12:01:32 -0500 |
commit | 03498f946430c1fbb411f94af7f9a366f4a7cd51 (patch) | |
tree | 225f584fbe5bcb8f72ebc5c624e1a8d9e8fa235c | |
parent | b1a2ab6eeffc62d32f648d9267dc101da77567d1 (diff) |
Update bcachefs sources to 04036b4910 bcachefs: Fix a memory leak
-rw-r--r-- | .bcachefs_revision | 2 | ||||
-rw-r--r-- | include/trace/events/bcachefs.h | 8 | ||||
-rw-r--r-- | libbcachefs/bcachefs.h | 7 | ||||
-rw-r--r-- | libbcachefs/btree_cache.c | 25 | ||||
-rw-r--r-- | libbcachefs/btree_cache.h | 2 | ||||
-rw-r--r-- | libbcachefs/btree_gc.c | 3 | ||||
-rw-r--r-- | libbcachefs/btree_io.c | 87 | ||||
-rw-r--r-- | libbcachefs/btree_io.h | 45 | ||||
-rw-r--r-- | libbcachefs/btree_iter.c | 7 | ||||
-rw-r--r-- | libbcachefs/btree_types.h | 59 | ||||
-rw-r--r-- | libbcachefs/btree_update_interior.c | 28 | ||||
-rw-r--r-- | libbcachefs/btree_update_leaf.c | 28 | ||||
-rw-r--r-- | libbcachefs/debug.c | 176 | ||||
-rw-r--r-- | libbcachefs/io.c | 9 | ||||
-rw-r--r-- | libbcachefs/journal.c | 56 | ||||
-rw-r--r-- | libbcachefs/journal.h | 1 | ||||
-rw-r--r-- | libbcachefs/super-io.c | 17 | ||||
-rw-r--r-- | libbcachefs/sysfs.c | 10 | ||||
-rw-r--r-- | libbcachefs/util.h | 4 |
19 files changed, 367 insertions, 207 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 52682fdc..6f4750b7 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -31718a290491ef933e0bfc5fb666a197b08a4d10 +04036b491089aeb4bac5d796ae1716d019564f7a diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 8cf6669e..05968879 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -918,6 +918,14 @@ TRACE_EVENT(trans_restart_mem_realloced, __entry->bytes) ); +DEFINE_EVENT(transaction_restart_iter, trans_restart_key_cache_key_realloced, + TP_PROTO(const char *trans_fn, + unsigned long caller_ip, + enum btree_id btree_id, + struct bpos *pos), + TP_ARGS(trans_fn, caller_ip, btree_id, pos) +); + #endif /* _TRACE_BCACHE_H */ /* This part must be outside protection */ diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 45a43f71..211fd5ad 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -534,14 +534,10 @@ enum { BCH_FS_NEED_ANOTHER_GC, BCH_FS_DELETED_NODES, BCH_FS_REBUILD_REPLICAS, - BCH_FS_HOLD_BTREE_WRITES, }; struct btree_debug { unsigned id; - struct dentry *btree; - struct dentry *btree_format; - struct dentry *failed; }; struct bch_fs_pcpu { @@ -886,7 +882,8 @@ struct bch_fs { struct bch_memquota_type quotas[QTYP_NR]; /* DEBUG JUNK */ - struct dentry *debug; + struct dentry *fs_debug_dir; + struct dentry *btree_debug_dir; struct btree_debug btree_debug[BTREE_ID_NR]; struct btree *verify_data; struct btree_node *verify_ondisk; diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 00d4b182..1347b1fc 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -15,6 +15,13 @@ struct lock_class_key bch2_btree_node_lock_key; +const char * const bch2_btree_node_flags[] = { +#define x(f) #f, + BTREE_FLAGS() +#undef x + NULL +}; + void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -217,15 +224,13 @@ wait_on_io: goto wait_on_io; } - if (btree_node_noevict(b)) - goto out_unlock; - - if (!btree_node_may_write(b)) + if (btree_node_noevict(b) || + btree_node_write_blocked(b) || + btree_node_will_make_reachable(b)) goto out_unlock; if (btree_node_dirty(b)) { - if (!flush || - test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) + if (!flush) goto out_unlock; /* * Using the underscore version because we don't want to compact @@ -234,9 +239,9 @@ wait_on_io: * the post write cleanup: */ if (bch2_verify_btree_ondisk) - bch2_btree_node_write(c, b, SIX_LOCK_intent); + bch2_btree_node_write(c, b, SIX_LOCK_intent, 0); else - __bch2_btree_node_write(c, b, false); + __bch2_btree_node_write(c, b, 0); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -415,7 +420,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) if (btree_node_dirty(b)) bch2_btree_complete_write(c, b, btree_current_write(b)); - clear_btree_node_dirty(c, b); + clear_btree_node_dirty_acct(c, b); btree_node_data_free(c, b); } @@ -1059,7 +1064,7 @@ wait_on_io: six_lock_write(&b->c.lock, NULL, NULL); if (btree_node_dirty(b)) { - __bch2_btree_node_write(c, b, false); + __bch2_btree_node_write(c, b, 0); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index f7e10986..2901f0dc 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -7,6 +7,8 @@ extern struct lock_class_key bch2_btree_node_lock_key; +extern const char * const bch2_btree_node_flags[]; + struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 88b234f5..cd901654 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1059,6 +1059,9 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only) bch2_trans_init(&trans, c, 0, 0); + if (initial) + trans.is_initial_gc = true; + for (i = 0; i < BTREE_ID_NR; i++) ids[i] = i; bubble_sort(ids, BTREE_ID_NR, btree_id_gc_phase_cmp); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 2b16b656..08f5f6b8 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -477,7 +477,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) }; if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) { - bch2_btree_node_write(c, b, SIX_LOCK_write); + bch2_btree_node_write(c, b, SIX_LOCK_write, 0); reinit_iter = true; } } @@ -1596,7 +1596,7 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, bch2_journal_pin_drop(&c->journal, &w->journal); } -static void btree_node_write_done(struct bch_fs *c, struct btree *b) +static void __btree_node_write_done(struct bch_fs *c, struct btree *b) { struct btree_write *w = btree_prev_write(b); unsigned long old, new, v; @@ -1607,26 +1607,11 @@ static void btree_node_write_done(struct bch_fs *c, struct btree *b) do { old = new = v; - if (old & (1U << BTREE_NODE_need_write)) - goto do_write; - - new &= ~(1U << BTREE_NODE_write_in_flight); - new &= ~(1U << BTREE_NODE_write_in_flight_inner); - } while ((v = cmpxchg(&b->flags, old, new)) != old); - - wake_up_bit(&b->flags, BTREE_NODE_write_in_flight); - return; - -do_write: - six_lock_read(&b->c.lock, NULL, NULL); - v = READ_ONCE(b->flags); - do { - old = new = v; - if ((old & (1U << BTREE_NODE_dirty)) && (old & (1U << BTREE_NODE_need_write)) && !(old & (1U << BTREE_NODE_never_write)) && - btree_node_may_write(b)) { + !(old & (1U << BTREE_NODE_write_blocked)) && + !(old & (1U << BTREE_NODE_will_make_reachable))) { new &= ~(1U << BTREE_NODE_dirty); new &= ~(1U << BTREE_NODE_need_write); new |= (1U << BTREE_NODE_write_in_flight); @@ -1640,8 +1625,13 @@ do_write: } while ((v = cmpxchg(&b->flags, old, new)) != old); if (new & (1U << BTREE_NODE_write_in_flight)) - __bch2_btree_node_write(c, b, true); + __bch2_btree_node_write(c, b, BTREE_WRITE_ALREADY_STARTED); +} +static void btree_node_write_done(struct bch_fs *c, struct btree *b) +{ + six_lock_read(&b->c.lock, NULL, NULL); + __btree_node_write_done(c, b); six_unlock_read(&b->c.lock); } @@ -1756,7 +1746,7 @@ static void btree_write_submit(struct work_struct *work) bch2_submit_wbio_replicas(&wbio->wbio, wbio->wbio.c, BCH_DATA_btree, &tmp.k); } -void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_started) +void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags) { struct btree_write_bio *wbio; struct bset_tree *t; @@ -1773,12 +1763,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta void *data; int ret; - if (already_started) + if (flags & BTREE_WRITE_ALREADY_STARTED) goto do_write; - if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) - return; - /* * We may only have a read lock on the btree node - the dirty bit is our * "lock" against racing with other threads that may be trying to start @@ -1792,13 +1779,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bool already_sta if (!(old & (1 << BTREE_NODE_dirty))) return; - if (!btree_node_may_write(b)) + if ((flags & BTREE_WRITE_ONLY_IF_NEED) && + !(old & (1 << BTREE_NODE_need_write))) return; - if (old & (1 << BTREE_NODE_never_write)) + if (old & + ((1 << BTREE_NODE_never_write)| + (1 << BTREE_NODE_write_blocked))) return; - BUG_ON(old & (1 << BTREE_NODE_write_in_flight)); + if (b->written && + (old & (1 << BTREE_NODE_will_make_reachable))) + return; + + if (old & (1 << BTREE_NODE_write_in_flight)) + return; new &= ~(1 << BTREE_NODE_dirty); new &= ~(1 << BTREE_NODE_need_write); @@ -1998,7 +1993,7 @@ err: b->written += sectors_to_write; nowrite: btree_bounce_free(c, bytes, used_mempool, data); - btree_node_write_done(c, b); + __btree_node_write_done(c, b); } /* @@ -2061,12 +2056,13 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) * Use this one if the node is intent locked: */ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, - enum six_lock_type lock_type_held) + enum six_lock_type lock_type_held, + unsigned flags) { if (lock_type_held == SIX_LOCK_intent || (lock_type_held == SIX_LOCK_read && six_lock_tryupgrade(&b->c.lock))) { - __bch2_btree_node_write(c, b, false); + __bch2_btree_node_write(c, b, flags); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && @@ -2078,7 +2074,7 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { - __bch2_btree_node_write(c, b, false); + __bch2_btree_node_write(c, b, flags); if (lock_type_held == SIX_LOCK_write && btree_node_just_written(b)) bch2_btree_post_write_cleanup(c, b); @@ -2112,30 +2108,3 @@ void bch2_btree_flush_all_writes(struct bch_fs *c) { __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); } - -void bch2_dirty_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c) -{ - struct bucket_table *tbl; - struct rhash_head *pos; - struct btree *b; - unsigned i; - - rcu_read_lock(); - for_each_cached_btree(b, c, tbl, i, pos) { - unsigned long flags = READ_ONCE(b->flags); - - if (!(flags & (1 << BTREE_NODE_dirty))) - continue; - - pr_buf(out, "%p d %u n %u l %u w %u b %u r %u:%lu\n", - b, - (flags & (1 << BTREE_NODE_dirty)) != 0, - (flags & (1 << BTREE_NODE_need_write)) != 0, - b->c.level, - b->written, - !list_empty_careful(&b->write_blocked), - b->will_make_reachable != 0, - b->will_make_reachable & 1); - } - rcu_read_unlock(); -} diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h index 095ad505..d818d876 100644 --- a/libbcachefs/btree_io.h +++ b/libbcachefs/btree_io.h @@ -15,18 +15,13 @@ struct btree; struct btree_iter; struct btree_node_read_all; -static inline bool btree_node_dirty(struct btree *b) -{ - return test_bit(BTREE_NODE_dirty, &b->flags); -} - -static inline void set_btree_node_dirty(struct bch_fs *c, struct btree *b) +static inline void set_btree_node_dirty_acct(struct bch_fs *c, struct btree *b) { if (!test_and_set_bit(BTREE_NODE_dirty, &b->flags)) atomic_inc(&c->btree_cache.dirty); } -static inline void clear_btree_node_dirty(struct bch_fs *c, struct btree *b) +static inline void clear_btree_node_dirty_acct(struct bch_fs *c, struct btree *b) { if (test_and_clear_bit(BTREE_NODE_dirty, &b->flags)) atomic_dec(&c->btree_cache.dirty); @@ -67,12 +62,6 @@ void __bch2_btree_node_wait_on_write(struct btree *); void bch2_btree_node_wait_on_read(struct btree *); void bch2_btree_node_wait_on_write(struct btree *); -static inline bool btree_node_may_write(struct btree *b) -{ - return list_empty_careful(&b->write_blocked) && - (!b->written || !b->will_make_reachable); -} - enum compact_mode { COMPACT_LAZY, COMPACT_ALL, @@ -148,41 +137,23 @@ int bch2_btree_root_read(struct bch_fs *, enum btree_id, void bch2_btree_complete_write(struct bch_fs *, struct btree *, struct btree_write *); -void __bch2_btree_node_write(struct bch_fs *, struct btree *, bool); bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); +#define BTREE_WRITE_ONLY_IF_NEED (1U << 0) +#define BTREE_WRITE_ALREADY_STARTED (1U << 1) + +void __bch2_btree_node_write(struct bch_fs *, struct btree *, unsigned); void bch2_btree_node_write(struct bch_fs *, struct btree *, - enum six_lock_type); + enum six_lock_type, unsigned); static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, enum six_lock_type lock_held) { - if (b->written && - btree_node_need_write(b) && - btree_node_may_write(b) && - !btree_node_write_in_flight(b)) - bch2_btree_node_write(c, b, lock_held); + bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); } -#define bch2_btree_node_write_cond(_c, _b, cond) \ -do { \ - unsigned long old, new, v = READ_ONCE((_b)->flags); \ - \ - do { \ - old = new = v; \ - \ - if (!(old & (1 << BTREE_NODE_dirty)) || !(cond)) \ - break; \ - \ - new |= (1 << BTREE_NODE_need_write); \ - } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \ - \ - btree_node_write_if_need(_c, _b, SIX_LOCK_read); \ -} while (0) - void bch2_btree_flush_all_reads(struct bch_fs *); void bch2_btree_flush_all_writes(struct bch_fs *); -void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *); static inline void compat_bformat(unsigned level, enum btree_id btree_id, unsigned version, unsigned big_endian, diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 8ff6a8d0..c0357ee9 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -558,7 +558,12 @@ void bch2_trans_unlock(struct btree_trans *trans) trans_for_each_path(trans, path) __bch2_btree_path_unlock(path); - BUG_ON(lock_class_is_held(&bch2_btree_node_lock_key)); + /* + * bch2_gc_btree_init_recurse() doesn't use btree iterators for walking + * btree nodes, it implements its own walking: + */ + BUG_ON(!trans->is_initial_gc && + lock_class_is_held(&bch2_btree_node_lock_key)); } /* Btree iterator: */ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 7a1555c2..d87069c5 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -392,6 +392,7 @@ struct btree_trans { bool restarted:1; bool memory_allocation_failure:1; bool journal_transaction_names:1; + bool is_initial_gc:1; /* * For when bch2_trans_update notices we'll be splitting a compressed * extent: @@ -424,7 +425,31 @@ struct btree_trans { struct replicas_delta_list *fs_usage_deltas; }; -#define BTREE_FLAG(flag) \ +#define BTREE_FLAGS() \ + x(read_in_flight) \ + x(read_error) \ + x(dirty) \ + x(need_write) \ + x(write_blocked) \ + x(will_make_reachable) \ + x(noevict) \ + x(write_idx) \ + x(accessed) \ + x(write_in_flight) \ + x(write_in_flight_inner) \ + x(just_written) \ + x(dying) \ + x(fake) \ + x(need_rewrite) \ + x(never_write) + +enum btree_flags { +#define x(flag) BTREE_NODE_##flag, + BTREE_FLAGS() +#undef x +}; + +#define x(flag) \ static inline bool btree_node_ ## flag(struct btree *b) \ { return test_bit(BTREE_NODE_ ## flag, &b->flags); } \ \ @@ -434,36 +459,8 @@ static inline void set_btree_node_ ## flag(struct btree *b) \ static inline void clear_btree_node_ ## flag(struct btree *b) \ { clear_bit(BTREE_NODE_ ## flag, &b->flags); } -enum btree_flags { - BTREE_NODE_read_in_flight, - BTREE_NODE_read_error, - BTREE_NODE_dirty, - BTREE_NODE_need_write, - BTREE_NODE_noevict, - BTREE_NODE_write_idx, - BTREE_NODE_accessed, - BTREE_NODE_write_in_flight, - BTREE_NODE_write_in_flight_inner, - BTREE_NODE_just_written, - BTREE_NODE_dying, - BTREE_NODE_fake, - BTREE_NODE_need_rewrite, - BTREE_NODE_never_write, -}; - -BTREE_FLAG(read_in_flight); -BTREE_FLAG(read_error); -BTREE_FLAG(need_write); -BTREE_FLAG(noevict); -BTREE_FLAG(write_idx); -BTREE_FLAG(accessed); -BTREE_FLAG(write_in_flight); -BTREE_FLAG(write_in_flight_inner); -BTREE_FLAG(just_written); -BTREE_FLAG(dying); -BTREE_FLAG(fake); -BTREE_FLAG(need_rewrite); -BTREE_FLAG(never_write); +BTREE_FLAGS() +#undef x static inline struct btree_write *btree_current_write(struct btree *b) { diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index ba76a86a..63832fb9 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -271,7 +271,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev six_lock_write(&b->c.lock, NULL, NULL); set_btree_node_accessed(b); - set_btree_node_dirty(c, b); + set_btree_node_dirty_acct(c, b); set_btree_node_need_write(b); bch2_bset_init_first(b, &b->data->keys); @@ -619,6 +619,8 @@ err: mutex_lock(&c->btree_interior_update_lock); list_del(&as->write_blocked_list); + if (list_empty(&b->write_blocked)) + clear_btree_node_write_blocked(b); /* * Node might have been freed, recheck under @@ -663,6 +665,7 @@ err: BUG_ON(b->will_make_reachable != (unsigned long) as); b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(b); } mutex_unlock(&c->btree_interior_update_lock); @@ -729,6 +732,8 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) as->mode = BTREE_INTERIOR_UPDATING_NODE; as->b = b; + + set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); mutex_unlock(&c->btree_interior_update_lock); @@ -794,6 +799,7 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree as->new_nodes[as->nr_new_nodes++] = b; b->will_make_reachable = 1UL|(unsigned long) as; + set_btree_node_will_make_reachable(b); mutex_unlock(&c->btree_interior_update_lock); @@ -816,6 +822,7 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) * xchg() is for synchronization with bch2_btree_complete_write: */ v = xchg(&b->will_make_reachable, 0); + clear_btree_node_will_make_reachable(b); as = (struct btree_update *) (v & ~1UL); if (!as) { @@ -881,7 +888,7 @@ static void bch2_btree_interior_update_will_free_node(struct btree_update *as, closure_wake_up(&c->btree_interior_update_wait); } - clear_btree_node_dirty(c, b); + clear_btree_node_dirty_acct(c, b); clear_btree_node_need_write(b); /* @@ -1096,8 +1103,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *old; trace_btree_set_root(c, b); - BUG_ON(!b->written && - !test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)); + BUG_ON(!b->written); old = btree_node_root(c, b); @@ -1165,7 +1171,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, bch2_btree_node_iter_advance(node_iter, b); bch2_btree_bset_insert_key(trans, path, b, node_iter, insert); - set_btree_node_dirty(c, b); + set_btree_node_dirty_acct(c, b); set_btree_node_need_write(b); } @@ -1386,8 +1392,8 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, six_unlock_write(&n2->c.lock); six_unlock_write(&n1->c.lock); - bch2_btree_node_write(c, n1, SIX_LOCK_intent); - bch2_btree_node_write(c, n2, SIX_LOCK_intent); + bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); + bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0); /* * Note that on recursive parent_keys == keys, so we @@ -1406,7 +1412,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, btree_split_insert_keys(as, trans, path, n3, &as->parent_keys); - bch2_btree_node_write(c, n3, SIX_LOCK_intent); + bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0); } } else { trace_btree_compact(c, b); @@ -1414,7 +1420,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans, bch2_btree_build_aux_trees(n1); six_unlock_write(&n1->c.lock); - bch2_btree_node_write(c, n1, SIX_LOCK_intent); + bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0); if (parent) bch2_keylist_add(&as->parent_keys, &n1->key); @@ -1702,7 +1708,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_btree_build_aux_trees(n); six_unlock_write(&n->c.lock); - bch2_btree_node_write(c, n, SIX_LOCK_intent); + bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); bkey_init(&delete.k); delete.k.p = prev->key.k.p; @@ -1776,7 +1782,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans, trace_btree_gc_rewrite_node(c, b); - bch2_btree_node_write(c, n, SIX_LOCK_intent); + bch2_btree_node_write(c, n, SIX_LOCK_intent, 0); if (parent) { bch2_keylist_add(&as->parent_keys, &n->key); diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 334df638..19cb6e1e 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -167,10 +167,24 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); + unsigned long old, new, v; + unsigned idx = w - b->writes; six_lock_read(&b->c.lock, NULL, NULL); - bch2_btree_node_write_cond(c, b, - (btree_current_write(b) == w && w->journal.seq == seq)); + v = READ_ONCE(b->flags); + + do { + old = new = v; + + if (!(old & (1 << BTREE_NODE_dirty)) || + !!(old & (1 << BTREE_NODE_write_idx)) != idx || + w->journal.seq != seq) + break; + + new |= 1 << BTREE_NODE_need_write; + } while ((v = cmpxchg(&b->flags, old, new)) != old); + + btree_node_write_if_need(c, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); return 0; } @@ -220,7 +234,7 @@ static bool btree_insert_key_leaf(struct btree_trans *trans, bch2_btree_add_journal_pin(c, b, trans->journal_res.seq); if (unlikely(!btree_node_dirty(b))) - set_btree_node_dirty(c, b); + set_btree_node_dirty_acct(c, b); live_u64s_added = (int) b->nr.live_u64s - old_live_u64s; u64s_added = (int) bset_u64s(t) - old_u64s; @@ -367,7 +381,13 @@ btree_key_can_insert_cached(struct btree_trans *trans, ck->u64s = new_u64s; ck->k = new_k; - return BTREE_INSERT_OK; + /* + * Keys returned by peek() are no longer valid pointers, so we need a + * transaction restart: + */ + trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_, + path->btree_id, &path->pos); + return btree_trans_restart(trans); } static inline void do_btree_insert_one(struct btree_trans *trans, diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index ee22ed31..2d65ae37 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -185,9 +185,10 @@ out: /* XXX: bch_fs refcounting */ struct dump_iter { - struct bpos from; - struct bch_fs *c; + struct bch_fs *c; enum btree_id id; + struct bpos from; + u64 iter; struct printbuf buf; @@ -226,6 +227,7 @@ static int bch2_dump_open(struct inode *inode, struct file *file) file->private_data = i; i->from = POS_MIN; + i->iter = 0; i->c = container_of(bd, struct bch_fs, btree_debug[bd->id]); i->id = bd->id; i->buf = PRINTBUF; @@ -420,10 +422,148 @@ static const struct file_operations bfloat_failed_debug_ops = { .read = bch2_read_bfloat_failed, }; +static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c, + struct btree *b) +{ + out->tabstops[0] = 32; + + pr_buf(out, "%px btree=%s l=%u ", + b, + bch2_btree_ids[b->c.btree_id], + b->c.level); + pr_newline(out); + + pr_indent_push(out, 2); + + bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); + pr_newline(out); + + pr_buf(out, "flags: "); + pr_tab(out); + bch2_flags_to_text(out, bch2_btree_node_flags, b->flags); + pr_newline(out); + + pr_buf(out, "written:"); + pr_tab(out); + pr_buf(out, "%u", b->written); + pr_newline(out); + + pr_buf(out, "writes blocked:"); + pr_tab(out); + pr_buf(out, "%u", !list_empty_careful(&b->write_blocked)); + pr_newline(out); + + pr_buf(out, "will make reachable:"); + pr_tab(out); + pr_buf(out, "%lx", b->will_make_reachable); + pr_newline(out); + + pr_buf(out, "journal pin %px:", &b->writes[0].journal); + pr_tab(out); + pr_buf(out, "%llu", b->writes[0].journal.seq); + pr_newline(out); + + pr_buf(out, "journal pin %px:", &b->writes[1].journal); + pr_tab(out); + pr_buf(out, "%llu", b->writes[1].journal.seq); + pr_newline(out); + + pr_indent_pop(out, 2); +} + +static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + bool done = false; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + do { + struct bucket_table *tbl; + struct rhash_head *pos; + struct btree *b; + + err = flush_buf(i); + if (err) + return err; + + if (!i->size) + break; + + rcu_read_lock(); + i->buf.atomic++; + tbl = rht_dereference_rcu(c->btree_cache.table.tbl, + &c->btree_cache.table); + if (i->iter < tbl->size) { + rht_for_each_entry_rcu(b, pos, tbl, i->iter, hash) + bch2_cached_btree_node_to_text(&i->buf, c, b); + i->iter++;; + } else { + done = true; + } + --i->buf.atomic; + rcu_read_unlock(); + } while (!done); + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations cached_btree_nodes_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_cached_btree_nodes_read, +}; + +static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + bool done = false; + int err; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + do { + err = flush_buf(i); + if (err) + return err; + + if (!i->size) + break; + + done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter); + i->iter++; + } while (!done); + + if (i->buf.allocation_failure) + return -ENOMEM; + + return i->ret; +} + +static const struct file_operations journal_pins_ops = { + .owner = THIS_MODULE, + .open = bch2_dump_open, + .release = bch2_dump_release, + .read = bch2_journal_pins_read, +}; + void bch2_fs_debug_exit(struct bch_fs *c) { - if (!IS_ERR_OR_NULL(c->debug)) - debugfs_remove_recursive(c->debug); + if (!IS_ERR_OR_NULL(c->fs_debug_dir)) + debugfs_remove_recursive(c->fs_debug_dir); } void bch2_fs_debug_init(struct bch_fs *c) @@ -435,29 +575,39 @@ void bch2_fs_debug_init(struct bch_fs *c) return; snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b); - c->debug = debugfs_create_dir(name, bch_debug); - if (IS_ERR_OR_NULL(c->debug)) + c->fs_debug_dir = debugfs_create_dir(name, bch_debug); + if (IS_ERR_OR_NULL(c->fs_debug_dir)) + return; + + debugfs_create_file("cached_btree_nodes", 0400, c->fs_debug_dir, + c->btree_debug, &cached_btree_nodes_ops); + + debugfs_create_file("journal_pins", 0400, c->fs_debug_dir, + c->btree_debug, &journal_pins_ops); + + c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir); + if (IS_ERR_OR_NULL(c->btree_debug_dir)) return; for (bd = c->btree_debug; bd < c->btree_debug + ARRAY_SIZE(c->btree_debug); bd++) { bd->id = bd - c->btree_debug; - bd->btree = debugfs_create_file(bch2_btree_ids[bd->id], - 0400, c->debug, bd, - &btree_debug_ops); + debugfs_create_file(bch2_btree_ids[bd->id], + 0400, c->btree_debug_dir, bd, + &btree_debug_ops); snprintf(name, sizeof(name), "%s-formats", bch2_btree_ids[bd->id]); - bd->btree_format = debugfs_create_file(name, 0400, c->debug, bd, - &btree_format_debug_ops); + debugfs_create_file(name, 0400, c->btree_debug_dir, bd, + &btree_format_debug_ops); snprintf(name, sizeof(name), "%s-bfloat-failed", bch2_btree_ids[bd->id]); - bd->failed = debugfs_create_file(name, 0400, c->debug, bd, - &bfloat_failed_debug_ops); + debugfs_create_file(name, 0400, c->btree_debug_dir, bd, + &bfloat_failed_debug_ops); } } diff --git a/libbcachefs/io.c b/libbcachefs/io.c index 3bea9986..cf97594b 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -2041,7 +2041,14 @@ retry_pick: ca = bch_dev_bkey_exists(c, pick.ptr.dev); - if (!pick.ptr.cached && + /* + * Stale dirty pointers are treated as IO errors, but @failed isn't + * allocated unless we're in the retry path - so if we're not in the + * retry path, don't check here, it'll be caught in bch2_read_endio() + * and we'll end up in the retry path: + */ + if ((flags & BCH_READ_IN_RETRY) && + !pick.ptr.cached && unlikely(ptr_stale(ca, &pick.ptr))) { read_from_stale_dirty_pointer(trans, k, pick.ptr); bch2_mark_io_failure(failed, &pick); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ffaf5895..9cd1e11a 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1281,35 +1281,59 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) spin_unlock(&j->lock); } -void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) +bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) { struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - u64 i; spin_lock(&j->lock); + *seq = max(*seq, j->pin.front); + + if (*seq >= j->pin.back) { + spin_unlock(&j->lock); + return true; + } + out->atomic++; - fifo_for_each_entry_ptr(pin_list, &j->pin, i) { - pr_buf(out, "%llu: count %u\n", - i, atomic_read(&pin_list->count)); + pin_list = journal_seq_pin(j, *seq); - list_for_each_entry(pin, &pin_list->key_cache_list, list) - pr_buf(out, "\t%px %ps\n", - pin, pin->flush); + pr_buf(out, "%llu: count %u", *seq, atomic_read(&pin_list->count)); + pr_newline(out); + pr_indent_push(out, 2); - list_for_each_entry(pin, &pin_list->list, list) - pr_buf(out, "\t%px %ps\n", - pin, pin->flush); + list_for_each_entry(pin, &pin_list->list, list) { + pr_buf(out, "\t%px %ps", pin, pin->flush); + pr_newline(out); + } + + list_for_each_entry(pin, &pin_list->key_cache_list, list) { + pr_buf(out, "\t%px %ps", pin, pin->flush); + pr_newline(out); + } - if (!list_empty(&pin_list->flushed)) - pr_buf(out, "flushed:\n"); + if (!list_empty(&pin_list->flushed)) { + pr_buf(out, "flushed:"); + pr_newline(out); + } - list_for_each_entry(pin, &pin_list->flushed, list) - pr_buf(out, "\t%px %ps\n", - pin, pin->flush); + list_for_each_entry(pin, &pin_list->flushed, list) { + pr_buf(out, "\t%px %ps", pin, pin->flush); + pr_newline(out); } + pr_indent_pop(out, 2); + --out->atomic; spin_unlock(&j->lock); + + return false; +} + +void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j) +{ + u64 seq = 0; + + while (!bch2_journal_seq_pins_to_text(out, j, &seq)) + seq++; } diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 29698174..0a3fb8a0 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -501,6 +501,7 @@ void bch2_journal_block(struct journal *); void __bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_debug_to_text(struct printbuf *, struct journal *); void bch2_journal_pins_to_text(struct printbuf *, struct journal *); +bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *); int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *, unsigned nr); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 08966f40..8580b6fd 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -1420,24 +1420,25 @@ static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { }; static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, - struct printbuf *orig_err) + struct printbuf *err) { unsigned type = le32_to_cpu(f->type); - struct printbuf err = *orig_err; + struct printbuf field_err = PRINTBUF; int ret; if (type >= BCH_SB_FIELD_NR) return 0; - pr_buf(&err, "Invalid superblock section %s: ", bch2_sb_fields[type]); - - ret = bch2_sb_field_ops[type]->validate(sb, f, &err); + ret = bch2_sb_field_ops[type]->validate(sb, f, &field_err); if (ret) { - pr_newline(&err); - bch2_sb_field_to_text(&err, sb, f); - *orig_err = err; + pr_buf(err, "Invalid superblock section %s: %s", + bch2_sb_fields[type], + field_err.buf); + pr_newline(err); + bch2_sb_field_to_text(err, sb, f); } + printbuf_exit(&field_err); return ret; } diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index ce32b906..3018250d 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -174,9 +174,7 @@ read_attribute(reserve_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); read_attribute(journal_debug); -read_attribute(journal_pins); read_attribute(btree_updates); -read_attribute(dirty_btree_nodes); read_attribute(btree_cache); read_attribute(btree_key_cache); read_attribute(btree_transactions); @@ -402,15 +400,9 @@ SHOW(bch2_fs) if (attr == &sysfs_journal_debug) bch2_journal_debug_to_text(out, &c->journal); - if (attr == &sysfs_journal_pins) - bch2_journal_pins_to_text(out, &c->journal); - if (attr == &sysfs_btree_updates) bch2_btree_updates_to_text(out, c); - if (attr == &sysfs_dirty_btree_nodes) - bch2_dirty_btree_nodes_to_text(out, c); - if (attr == &sysfs_btree_cache) bch2_btree_cache_to_text(out, c); @@ -564,9 +556,7 @@ SYSFS_OPS(bch2_fs_internal); struct attribute *bch2_fs_internal_files[] = { &sysfs_journal_debug, - &sysfs_journal_pins, &sysfs_btree_updates, - &sysfs_dirty_btree_nodes, &sysfs_btree_cache, &sysfs_btree_key_cache, &sysfs_btree_transactions, diff --git a/libbcachefs/util.h b/libbcachefs/util.h index 25ae98cc..4095df2f 100644 --- a/libbcachefs/util.h +++ b/libbcachefs/util.h @@ -300,6 +300,10 @@ static inline void pr_indent_push(struct printbuf *buf, unsigned spaces) static inline void pr_indent_pop(struct printbuf *buf, unsigned spaces) { + if (buf->last_newline + buf->indent == buf->pos) { + buf->pos -= spaces; + buf->buf[buf->pos] = 0; + } buf->indent -= spaces; } |