diff options
-rw-r--r-- | Documentation/filesystems/bcachefs/future/idle_work.rst | 6 | ||||
-rw-r--r-- | fs/bcachefs/backpointers.c | 49 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/extent_update.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/recovery_passes_format.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/sb-counters_format.h | 10 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 51 | ||||
-rw-r--r-- | fs/bcachefs/super-io.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 6 | ||||
-rw-r--r-- | mm/shrinker.c | 4 |
13 files changed, 102 insertions, 54 deletions
diff --git a/Documentation/filesystems/bcachefs/future/idle_work.rst b/Documentation/filesystems/bcachefs/future/idle_work.rst index 59a332509dcd..f1202113dde0 100644 --- a/Documentation/filesystems/bcachefs/future/idle_work.rst +++ b/Documentation/filesystems/bcachefs/future/idle_work.rst @@ -11,10 +11,10 @@ idle" so the system can go to sleep. We don't want to be dribbling out background work while the system should be idle. The complicating factor is that there are a number of background tasks, which -form a heirarchy (or a digraph, depending on how you divide it up) - one +form a hierarchy (or a digraph, depending on how you divide it up) - one background task may generate work for another. -Thus proper idle detection needs to model this heirarchy. +Thus proper idle detection needs to model this hierarchy. - Foreground writes - Page cache writeback @@ -51,7 +51,7 @@ IDLE REGIME When the system becomes idle, we should start flushing our pending work quicker so the system can go to sleep. -Note that the definition of "idle" depends on where in the heirarchy a task +Note that the definition of "idle" depends on where in the hierarchy a task is - a task should start flushing work more quickly when the task above it has stopped generating new work. diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index c43aaab4c108..cb25cddb759b 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -532,10 +532,6 @@ static int check_bp_exists(struct btree_trans *trans, struct btree_iter other_extent_iter = {}; CLASS(printbuf, buf)(); - if (bpos_lt(bp->k.p, s->bp_start) || - bpos_gt(bp->k.p, s->bp_end)) - return 0; - CLASS(btree_iter, bp_iter)(trans, BTREE_ID_backpointers, bp->k.p, 0); struct bkey_s_c bp_k = bch2_btree_iter_peek_slot(&bp_iter); int ret = bkey_err(bp_k); @@ -690,6 +686,10 @@ static int check_extent_to_backpointers(struct btree_trans *trans, struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); + if (bpos_lt(bp.k.p, s->bp_start) || + bpos_gt(bp.k.p, s->bp_end)) + continue; + int ret = !empty ? check_bp_exists(trans, s, &bp, k) : bch2_bucket_backpointer_mod(trans, k, &bp, true); @@ -809,8 +809,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, for (enum btree_id btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { - /* btree_type_has_ptrs should probably include BTREE_ID_stripes, - * definitely her... */ int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -899,7 +897,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); - if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointer_bucket_gen && (bp.v->bucket_gen != a->gen || bp.v->pad)) { ret = bch2_backpointer_del(trans, bp_k.k->p); @@ -931,6 +929,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (sectors[ALLOC_dirty] != a->dirty_sectors || sectors[ALLOC_cached] != a->cached_sectors || sectors[ALLOC_stripe] != a->stripe_sectors) { + /* + * Post 1.14 upgrade, we assume that backpointers are mostly + * correct and a sector count mismatch is probably due to a + * write buffer race + * + * Pre upgrade, we expect all the buckets to be wrong, a write + * buffer flush is pointless: + */ if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) @@ -978,12 +984,22 @@ static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) goto next; struct bpos bucket = bp_pos_to_bucket(ca, pos); - u64 next = ca->mi.nbuckets; - - unsigned long *bitmap = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); - if (bitmap) - next = min_t(u64, next, - find_next_bit(bitmap, ca->mi.nbuckets, bucket.offset)); + u64 next = min(bucket.offset, ca->mi.nbuckets); + + unsigned long *mismatch = READ_ONCE(ca->bucket_backpointer_mismatch.buckets); + unsigned long *empty = READ_ONCE(ca->bucket_backpointer_empty.buckets); + /* + * Find the first bucket with mismatches - but + * not empty buckets; we don't need to pin those + * because we just recreate all backpointers in + * those buckets + */ + if (mismatch && empty) + next = find_next_andnot_bit(mismatch, empty, ca->mi.nbuckets, next); + else if (mismatch) + next = find_next_bit(mismatch, ca->mi.nbuckets, next); + else + next = ca->mi.nbuckets; bucket.offset = next; if (bucket.offset == ca->mi.nbuckets) @@ -1110,17 +1126,18 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (ret) goto err; - u64 nr_buckets = 0, nr_mismatches = 0; + u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; for_each_member_device(c, ca) { nr_buckets += ca->mi.nbuckets; nr_mismatches += ca->bucket_backpointer_mismatch.nr; + nr_empty += ca->bucket_backpointer_empty.nr; } if (!nr_mismatches) goto err; - bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", - nr_mismatches, nr_buckets); + bch_info(c, "scanning for missing backpointers in %llu/%llu buckets, %llu buckets with no backpointers", + nr_mismatches - nr_empty, nr_buckets, nr_empty); while (1) { ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 276cf088539e..2e3dd9bacac5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -131,10 +131,10 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; - p = kvmalloc(size, GFP_NOWAIT); + p = kvmalloc(size, GFP_NOWAIT|__GFP_ACCOUNT|__GFP_RECLAIMABLE); if (!p) { *used_mempool = true; - p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); + p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS|__GFP_ACCOUNT|__GFP_RECLAIMABLE); } memalloc_nofs_restore(flags); return p; @@ -1014,6 +1014,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, k = bkey_p_next(k); continue; drop_this_key: + ret = 0; next_good_key = k->u64s; if (!next_good_key || @@ -1470,7 +1471,7 @@ start: } prt_newline(&buf); - if (failed.nr) + if (ret || failed.nr) bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); async_object_list_del(c, btree_read_bio, rb->list_idx); diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 7ddb156c765c..73eb28090bc7 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -115,9 +115,15 @@ int bch2_extent_trim_atomic(struct btree_trans *trans, copy.flags |= BTREE_ITER_nofilter_whiteouts; + /* + * We're doing our own whiteout filtering, but we still need to pass a + * max key to avoid popping an assert in bch2_snapshot_is_ancestor(): + */ struct bkey_s_c k; unsigned nr_iters = 0; - for_each_btree_key_continue_norestart(copy, 0, k, ret) { + for_each_btree_key_max_continue_norestart(copy, + POS(insert->k.p.inode, U64_MAX), + 0, k, ret) { unsigned offset = 0; if (bkey_gt(iter->pos, bkey_start_pos(k.k))) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 07869436a964..d56959f12210 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -737,9 +737,9 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, return ret; CLASS(printbuf, buf)(); + prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); bch2_journal_debug_to_text(&buf, j); bch2_print_str(c, KERN_ERR, buf.buf); - prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); closure_wait_event(&j->async_wait, !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 84ce69a7f131..31a3abcbd83e 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -242,7 +242,7 @@ enum fsck_err_opts { x(inodes_32bit, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ - BCH_SB_INODE_32BIT, true, \ + BCH_SB_INODE_32BIT, false, \ NULL, "Constrain inode numbers to 32 bits") \ x(shard_inode_numbers_bits, u8, \ OPT_FS|OPT_FORMAT, \ @@ -321,6 +321,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Don't kick drives out when splitbrain detected")\ + x(no_version_check, u8, \ + OPT_HIDDEN, \ + OPT_BOOL(), \ + BCH2_NO_SB_OPT, false, \ + NULL, "Don't fail reading the superblock due to incompatible version")\ x(verbose, u8, \ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 29e81f96db0f..8280ca333f5b 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -64,7 +64,6 @@ int bch2_btree_lost_data(struct bch_fs *c, * but in debug mode we want the next fsck run to be clean: */ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0, &write_sb) ?: ret; - ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); diff --git a/fs/bcachefs/recovery_passes_format.h b/fs/bcachefs/recovery_passes_format.h index b63c20558d3d..2696eee00345 100644 --- a/fs/bcachefs/recovery_passes_format.h +++ b/fs/bcachefs/recovery_passes_format.h @@ -37,7 +37,7 @@ x(check_alloc_info, 10, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_lrus, 11, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_btree_backpointers, 12, PASS_ONLINE|PASS_FSCK_ALLOC) \ - x(check_backpointers_to_extents, 13, PASS_ONLINE|PASS_FSCK_DEBUG) \ + x(check_backpointers_to_extents, 13, PASS_ONLINE) \ x(check_extents_to_backpointers, 14, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(check_alloc_to_lru_refs, 15, PASS_ONLINE|PASS_FSCK_ALLOC) \ x(fs_freespace_init, 16, PASS_ALWAYS|PASS_SILENT) \ diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index 44bc12573a0c..bfeb713dd210 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -22,7 +22,7 @@ enum counters_flags { x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ x(io_read_retry, 32, TYPE_COUNTER) \ - x(io_read_fail_and_poison, 82, TYPE_COUNTER) \ + x(io_read_fail_and_poison, 95, TYPE_COUNTER) \ x(io_write, 1, TYPE_SECTORS) \ x(io_move, 2, TYPE_SECTORS) \ x(io_move_read, 35, TYPE_SECTORS) \ @@ -124,4 +124,12 @@ struct bch_sb_field_counters { __le64 d[]; }; +static inline void __maybe_unused check_bch_counter_ids_unique(void) { + switch(0){ +#define x(t, n, ...) case (n): + BCH_PERSISTENT_COUNTERS() +#undef x + } +} + #endif /* _BCACHEFS_SB_COUNTERS_FORMAT_H */ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 369465a4de77..5897380c4c08 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -379,7 +379,7 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) return 0; } -int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, +int bch2_sb_validate(struct bch_sb *sb, struct bch_opts *opts, u64 read_offset, enum bch_validate_flags flags, struct printbuf *out) { enum bch_opt_id opt_id; @@ -389,28 +389,30 @@ int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, if (ret) return ret; - u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); - unsigned incompat_bit = 0; - if (incompat) - incompat_bit = __ffs64(incompat); - else if (sb->features[1]) - incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); - - if (incompat_bit) { - prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", - incompat_bit, - bch2_sb_features[BCH_FEATURE_NR - 1], - BCH_FEATURE_NR - 1); - return -BCH_ERR_invalid_sb_features; - } + if (!opts->no_version_check) { + u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); + unsigned incompat_bit = 0; + if (incompat) + incompat_bit = __ffs64(incompat); + else if (sb->features[1]) + incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); + + if (incompat_bit) { + prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", + incompat_bit, + bch2_sb_features[BCH_FEATURE_NR - 1], + BCH_FEATURE_NR - 1); + return -BCH_ERR_invalid_sb_features; + } - if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || - BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { - prt_str(out, "Filesystem has incompatible version "); - bch2_version_to_text(out, le16_to_cpu(sb->version)); - prt_str(out, ", current version "); - bch2_version_to_text(out, bcachefs_metadata_version_current); - return -BCH_ERR_invalid_sb_features; + if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || + BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { + prt_str(out, "Filesystem has incompatible version "); + bch2_version_to_text(out, le16_to_cpu(sb->version)); + prt_str(out, ", current version "); + bch2_version_to_text(out, bcachefs_metadata_version_current); + return -BCH_ERR_invalid_sb_features; + } } if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { @@ -915,7 +917,7 @@ got_super: sb->have_layout = true; - ret = bch2_sb_validate(sb->sb, offset, 0, &err); + ret = bch2_sb_validate(sb->sb, opts, offset, 0, &err); if (ret) { bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); @@ -1081,9 +1083,10 @@ int bch2_write_super(struct bch_fs *c) bch2_sb_from_fs(c, (*ca)); darray_for_each(online_devices, ca) { + struct bch_opts opts = bch2_opts_empty(); printbuf_reset(&err); - ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); + ret = bch2_sb_validate((*ca)->disk_sb.sb, &opts, 0, BCH_VALIDATE_write, &err); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); goto out; diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index a3b7a90f2533..82cb3a3ceeae 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -92,7 +92,8 @@ int bch2_sb_from_fs(struct bch_fs *, struct bch_dev *); void bch2_free_super(struct bch_sb_handle *); int bch2_sb_realloc(struct bch_sb_handle *, unsigned); -int bch2_sb_validate(struct bch_sb *, u64, enum bch_validate_flags, struct printbuf *); +int bch2_sb_validate(struct bch_sb *, struct bch_opts *, u64, + enum bch_validate_flags, struct printbuf *); int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *); int bch2_read_super_silent(const char *, struct bch_opts *, struct bch_sb_handle *); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 09e7f8ae9922..ee3b30b1c2b5 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1021,6 +1021,12 @@ static int bch2_fs_opt_version_init(struct bch_fs *c) prt_bitflags(&p, bch2_recovery_passes, sb_passes); } + u64 btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); + if (btrees_lost_data) { + prt_str(&p, "\nsuperblock indicates damage to following btrees:\n "); + prt_bitflags(&p, __bch2_btree_ids, btrees_lost_data); + } + if (bch2_check_version_downgrade(c)) { prt_str(&p, "\nVersion downgrade required:"); diff --git a/mm/shrinker.c b/mm/shrinker.c index c94eedf2cfd8..4a76364d2b7e 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -833,7 +833,9 @@ void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker) }; unsigned long nr_freed = atomic_long_read(&shrinker->objects_freed); - seq_buf_puts(out, shrinker->name); + seq_buf_printf(out, "%ps", shrinker->scan_objects); + if (shrinker->name) + seq_buf_printf(out, ": %s", shrinker->name); seq_buf_putc(out, '\n'); seq_buf_printf(out, "objects: %lu\n", shrinker->count_objects(shrinker, &sc)); |