diff options
34 files changed, 302 insertions, 193 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index dd8323f9..ff4da657 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -e02a6a521ba07a404c589f2873fcd9cb8189c76e +b47c5ba55d592855bdfc77883f41207bdad56248 diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c index f4712597..9e9cf647 100644 --- a/c_src/bcachefs.c +++ b/c_src/bcachefs.c @@ -40,11 +40,13 @@ void bcachefs_usage(void) "\n" "Commands for managing images:\n" " image create Create a new compact disk image\n" + "\n" "Mount:\n" " mount Mount a filesystem\n" "\n" "Repair:\n" " fsck Check an existing filesystem for errors\n" + " recovery-pass Schedule or deschedule recovery passes\n" "\n" #if 0 "Startup/shutdown, assembly of multi device filesystems:\n" @@ -105,18 +107,3 @@ void bcachefs_usage(void) " completions Generate shell completions\n" " version Display the version of the invoked bcachefs tool\n"); } - -int fs_cmds(int argc, char *argv[]) -{ - char *cmd = pop_cmd(&argc, argv); - - if (argc < 1) - return fs_usage(); - if (!strcmp(cmd, "usage")) - return cmd_fs_usage(argc, argv); - if (!strcmp(cmd, "top")) - return cmd_fs_top(argc, argv); - - fs_usage(); - return -EINVAL; -} diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c index 79826cb2..aa825e90 100644 --- a/c_src/cmd_fs.c +++ b/c_src/cmd_fs.c @@ -559,3 +559,18 @@ int cmd_fs_usage(int argc, char *argv[]) printbuf_exit(&buf); return 0; } + +int fs_cmds(int argc, char *argv[]) +{ + char *cmd = pop_cmd(&argc, argv); + + if (argc < 1) + return fs_usage(); + if (!strcmp(cmd, "usage")) + return cmd_fs_usage(argc, argv); + if (!strcmp(cmd, "top")) + return cmd_fs_top(argc, argv); + + fs_usage(); + return -EINVAL; +} diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c index 66669e67..8c37b1ba 100644 --- a/c_src/cmd_fsck.c +++ b/c_src/cmd_fsck.c @@ -6,27 +6,11 @@ #include "cmds.h" #include "libbcachefs/error.h" #include "libbcachefs.h" +#include "libbcachefs/recovery_passes.h" #include "libbcachefs/super.h" #include "libbcachefs/super-io.h" #include "tools-util.h" -static void fsck_usage(void) -{ - puts("bcachefs fsck - filesystem check and repair\n" - "Usage: bcachefs fsck [OPTION]... <devices>\n" - "\n" - "Options:\n" - " -p Automatic repair (no questions)\n" - " -n Don't repair, only check for errors\n" - " -y Assume \"yes\" to all questions\n" - " -f Force checking even if filesystem is marked clean\n" - " -r, --ratelimit_errors Don't display more than 10 errors of a given type\n" - " -k, --kernel Use the in-kernel fsck implementation\n" - " -v Be verbose\n" - " -h, --help Display this help and exit\n" - "Report bugs to <linux-bcachefs@vger.kernel.org>"); -} - static void setnonblocking(int fd) { int flags = fcntl(fd, F_GETFL); @@ -212,6 +196,23 @@ static char *loopdev_alloc(const char *path) return line; } +static void fsck_usage(void) +{ + puts("bcachefs fsck - filesystem check and repair\n" + "Usage: bcachefs fsck [OPTION]... <devices>\n" + "\n" + "Options:\n" + " -p Automatic repair (no questions)\n" + " -n Don't repair, only check for errors\n" + " -y Assume \"yes\" to all questions\n" + " -f Force checking even if filesystem is marked clean\n" + " -r, --ratelimit_errors Don't display more than 10 errors of a given type\n" + " -k, --kernel Use the in-kernel fsck implementation\n" + " -v Be verbose\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + int cmd_fsck(int argc, char *argv[]) { static const struct option longopts[] = { @@ -370,3 +371,94 @@ userland_fsck: printbuf_exit(&opts_str); return ret; } + +static void recovery_pass_usage(void) +{ + puts("bcachefs recovery-pass - list and manage scheduled recovery passes\n" + "Usage: bcachefs recovery-pass [OPTION]... <devices>\n" + "\n" + "Currently only supports unmounted/offline filesystems\n" + "\n" + "Options:\n" + " -s, --set Schedule a recovery pass in the superblock\n" + " -u, --unset Deschedule a recovery pass\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_recovery_pass(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "set", required_argument, NULL, 's' }, + { "unset", required_argument, NULL, 'u' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + u64 passes_to_set = 0, passes_to_unset = 0; + int opt; + + while ((opt = getopt_long(argc, argv, "s:u:h", longopts, NULL)) != -1) + switch (opt) { + case 's': + passes_to_set |= read_flag_list_or_die(optarg, + bch2_recovery_passes, + "recovery pass"); + break; + case 'u': + passes_to_unset |= read_flag_list_or_die(optarg, + bch2_recovery_passes, + "recovery pass"); + break; + case 'h': + recovery_pass_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + passes_to_set = bch2_recovery_passes_to_stable(passes_to_set); + passes_to_unset = bch2_recovery_passes_to_stable(passes_to_unset); + + darray_const_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_opts opts = bch2_opts_empty(); + opt_set(opts, nostart, true); + + struct bch_fs *c = bch2_fs_open(&devs, &opts); + int ret = PTR_ERR_OR_ZERO(c); + if (ret) + die("Error opening filesystem: %s", bch2_err_str(ret)); + + scoped_guard(mutex, &c->sb_lock) { + struct bch_sb_field_ext *ext = + bch2_sb_field_get_minsize(&c->disk_sb, ext, + sizeof(struct bch_sb_field_ext) / sizeof(u64)); + if (!ext) { + fprintf(stderr, "Error getting sb_field_ext\n"); + goto err; + } + + u64 scheduled = le64_to_cpu(ext->recovery_passes_required[0]); + + if (passes_to_set || passes_to_unset) { + ext->recovery_passes_required[0] &= ~cpu_to_le64(passes_to_unset); + ext->recovery_passes_required[0] |= cpu_to_le64(passes_to_set); + + scheduled = le64_to_cpu(ext->recovery_passes_required[0]); + + bch2_write_super(c); + } + + CLASS(printbuf, buf)(); + prt_str(&buf, "Scheduled recovery passes: "); + if (scheduled) + prt_bitflags(&buf, bch2_recovery_passes, + bch2_recovery_passes_from_stable(scheduled)); + else + prt_str(&buf, "(none)"); + printf("%s\n", buf.buf); + } +err: + bch2_fs_stop(c); + return ret; +} + diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c index 0d24018c..b4da4e5b 100644 --- a/c_src/cmd_migrate.c +++ b/c_src/cmd_migrate.c @@ -244,7 +244,6 @@ static int migrate_fs(const char *fs_path, find_superblock_space(extents, format_opts, dev); struct bch_sb *sb = bch2_format(fs_opt_strs, fs_opts, format_opts, devs); - darray_exit(&devs); u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]); @@ -316,6 +315,8 @@ static int migrate_fs(const char *fs_path, "no longer needed (and should be deleted prior to running\n" "bcachefs migrate-superblock)\n", sb_offset, dev->path, dev->path, sb_offset); + + darray_exit(&devs); return 0; } diff --git a/c_src/cmds.h b/c_src/cmds.h index 4454f33f..30b47286 100644 --- a/c_src/cmds.h +++ b/c_src/cmds.h @@ -31,6 +31,7 @@ int cmd_set_passphrase(int argc, char *argv[]); int cmd_remove_passphrase(int argc, char *argv[]); int cmd_fsck(int argc, char *argv[]); +int cmd_recovery_pass(int argc, char *argv[]); int cmd_dump(int argc, char *argv[]); int cmd_list_journal(int argc, char *argv[]); diff --git a/include/linux/bio.h b/include/linux/bio.h index cfd241da..6528fe19 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -284,7 +284,7 @@ do { \ (dst)->bi_bdev = (src)->bi_bdev; \ } while (0) -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +static inline void *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) { return page_address(bvec->bv_page) + bvec->bv_offset; } @@ -294,14 +294,14 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) *flags = 0; } -static inline char *bvec_kmap_local(struct bio_vec *bvec) +static inline void *bvec_kmap_local(struct bio_vec *bvec) { return page_address(bvec->bv_page) + bvec->bv_offset; } static inline void bvec_kunmap_local(char *buffer) {} -static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, +static inline void *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, unsigned long *flags) { return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags); diff --git a/include/linux/types.h b/include/linux/types.h index 5ee5ebc6..a1473592 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -53,6 +53,12 @@ typedef __s16 s16; typedef __u8 u8; typedef __s8 s8; +typedef unsigned char unchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long ullong; + #ifdef __CHECKER__ #define __bitwise__ __attribute__((bitwise)) #else diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 77d93beb..bc277f42 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -144,7 +144,8 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, if (!will_check && __bch2_inconsistent_error(c, &buf)) ret = bch_err_throw(c, erofs_unfixed_errors); - bch_err(c, "%s", buf.buf); + if (buf.buf) + bch_err(c, "%s", buf.buf); printbuf_exit(&buf); return ret; } diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index a3631a90..49505653 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -86,7 +86,7 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) six_unlock_intent(&b->c.lock); } -static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) +void __btree_node_data_free(struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); @@ -113,16 +113,17 @@ static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; - - btree_node_to_freedlist(bc, b); } static void btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(list_empty(&b->list)); list_del_init(&b->list); + + __btree_node_data_free(b); + --bc->nr_freeable; - __btree_node_data_free(bc, b); + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -186,10 +187,7 @@ static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { - struct btree_cache *bc = &c->btree_cache; - struct btree *b; - - b = __btree_node_mem_alloc(c, GFP_KERNEL); + struct btree *b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; @@ -199,8 +197,6 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) } bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); - - __bch2_btree_node_to_freelist(bc, b); return b; } @@ -526,7 +522,8 @@ restart: --touched;; } else if (!btree_node_reclaim(c, b)) { __bch2_btree_node_hash_remove(bc, b); - __btree_node_data_free(bc, b); + __btree_node_data_free(b); + btree_node_to_freedlist(bc, b); freed++; bc->nr_freed++; @@ -667,9 +664,12 @@ int bch2_fs_btree_cache_init(struct bch_fs *c) bch2_recalc_btree_reserve(c); - for (i = 0; i < bc->nr_reserve; i++) - if (!__bch2_btree_node_mem_alloc(c)) + for (i = 0; i < bc->nr_reserve; i++) { + struct btree *b = __bch2_btree_node_mem_alloc(c); + if (!b) goto err; + __bch2_btree_node_to_freelist(bc, b); + } list_splice_init(&bc->live[0].list, &bc->freeable); diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h index 3264801c..649e9dfd 100644 --- a/libbcachefs/btree_cache.h +++ b/libbcachefs/btree_cache.h @@ -30,6 +30,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *, enum btree_id, unsig void bch2_btree_cache_cannibalize_unlock(struct btree_trans *); int bch2_btree_cache_cannibalize_lock(struct btree_trans *, struct closure *); +void __btree_node_data_free(struct btree *); struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *); struct btree *bch2_btree_node_mem_alloc(struct btree_trans *, bool); diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 84e302af..8924dae1 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -24,8 +24,15 @@ #include "super-io.h" #include "trace.h" +#include <linux/moduleparam.h> #include <linux/sched/mm.h> +#ifdef CONFIG_BCACHEFS_DEBUG +static unsigned bch2_btree_read_corrupt_ratio; +module_param_named(btree_read_corrupt_ratio, bch2_btree_read_corrupt_ratio, uint, 0644); +MODULE_PARM_DESC(btree_read_corrupt_ratio, ""); +#endif + static void bch2_btree_node_header_to_text(struct printbuf *out, struct btree_node *bn) { bch2_btree_id_level_to_text(out, BTREE_NODE_ID(bn), BTREE_NODE_LEVEL(bn)); @@ -568,9 +575,9 @@ static int __btree_err(int ret, bch2_mark_btree_validate_failure(failed, ca->dev_idx); struct extent_ptr_decoded pick; - have_retry = !bch2_bkey_pick_read_device(c, + have_retry = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), - failed, &pick, -1); + failed, &pick, -1) == 1; } if (!have_retry && ret == -BCH_ERR_btree_node_read_err_want_retry) @@ -615,7 +622,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } @@ -644,7 +650,6 @@ static int __btree_err(int ret, goto out; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); - ret = __bch2_topology_error(c, &out); break; } print: @@ -1408,7 +1413,7 @@ static void btree_node_read_work(struct work_struct *work) ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); - if (ret) { + if (ret <= 0) { set_btree_node_read_error(b); break; } @@ -1439,6 +1444,11 @@ start: continue; } + memset(&bio->bi_iter, 0, sizeof(bio->bi_iter)); + bio->bi_iter.bi_size = btree_buf_bytes(b); + + bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); + ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); if (ret == -BCH_ERR_btree_node_read_err_want_retry || ret == -BCH_ERR_btree_node_read_err_must_retry) diff --git a/libbcachefs/btree_journal_iter.c b/libbcachefs/btree_journal_iter.c index 341d31b3..ea839560 100644 --- a/libbcachefs/btree_journal_iter.c +++ b/libbcachefs/btree_journal_iter.c @@ -717,18 +717,6 @@ static void __journal_keys_sort(struct journal_keys *keys) keys->nr = dst - keys->data; } -static bool should_rewind_entry(struct bch_fs *c, struct jset_entry *entry) -{ - if (entry->level) - return false; - if (btree_id_is_alloc(entry->btree_id)) - return false; - if (c->opts.journal_rewind_no_extents && - entry->btree_id == BTREE_ID_extents) - return false; - return true; -} - int bch2_journal_keys_sort(struct bch_fs *c) { struct genradix_iter iter; @@ -747,8 +735,9 @@ int bch2_journal_keys_sort(struct bch_fs *c) cond_resched(); vstruct_for_each(&i->j, entry) { - bool rewind = le64_to_cpu(i->j.seq) >= rewind_seq && - should_rewind_entry(c, entry); + bool rewind = !entry->level && + !btree_id_is_alloc(entry->btree_id) && + le64_to_cpu(i->j.seq) >= rewind_seq; if (entry->type != (rewind ? BCH_JSET_ENTRY_overwrite diff --git a/libbcachefs/btree_node_scan.c b/libbcachefs/btree_node_scan.c index 23d8c62e..42c9eb2c 100644 --- a/libbcachefs/btree_node_scan.c +++ b/libbcachefs/btree_node_scan.c @@ -75,39 +75,6 @@ static inline u64 bkey_journal_seq(struct bkey_s_c k) } } -static bool found_btree_node_is_readable(struct btree_trans *trans, - struct found_btree_node *f) -{ - struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; - - found_btree_node_to_key(&tmp.k, f); - - struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false); - bool ret = !IS_ERR_OR_NULL(b); - if (!ret) - return ret; - - f->sectors_written = b->written; - f->journal_seq = le64_to_cpu(b->data->keys.journal_seq); - - struct bkey_s_c k; - struct bkey unpacked; - struct btree_node_iter iter; - for_each_btree_node_key_unpack(b, k, &iter, &unpacked) - f->journal_seq = max(f->journal_seq, bkey_journal_seq(k)); - - six_unlock_read(&b->c.lock); - - /* - * We might update this node's range; if that happens, we need the node - * to be re-read so the read path can trim keys that are no longer in - * this node - */ - if (b != btree_node_root(trans->c, b)) - bch2_btree_node_evict(trans, &tmp.k); - return ret; -} - static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; @@ -159,17 +126,17 @@ static const struct min_heap_callbacks found_btree_node_heap_cbs = { }; static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, - struct bio *bio, struct btree_node *bn, u64 offset) + struct btree *b, struct bio *bio, u64 offset) { struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes); + struct btree_node *bn = b->data; bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); bio->bi_iter.bi_sector = offset; - bch2_bio_map(bio, bn, PAGE_SIZE); + bch2_bio_map(bio, b->data, c->opts.block_size); u64 submit_time = local_clock(); submit_bio_wait(bio); - bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); if (bio->bi_status) { @@ -201,6 +168,14 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX) return; + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); + + submit_time = local_clock(); + submit_bio_wait(bio); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + rcu_read_lock(); struct found_btree_node n = { .btree_id = BTREE_NODE_ID(bn), @@ -217,7 +192,20 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); - if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) { + found_btree_node_to_key(&b->key, &n); + + CLASS(printbuf, buf)(); + if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { + /* read_done will swap out b->data for another buffer */ + bn = b->data; + /* + * Grab journal_seq here because we want the max journal_seq of + * any bset; read_done sorts down to a single set and picks the + * max journal_seq + */ + n.journal_seq = le64_to_cpu(bn->keys.journal_seq), + n.sectors_written = b->written; + mutex_lock(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); @@ -237,12 +225,20 @@ static int read_btree_nodes_worker(void *p) struct find_btree_nodes_worker *w = p; struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes); struct bch_dev *ca = w->ca; - void *buf = (void *) __get_free_page(GFP_KERNEL); - struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL); unsigned long last_print = jiffies; + struct btree *b = NULL; + struct bio *bio = NULL; + + b = __bch2_btree_node_mem_alloc(c); + if (!b) { + bch_err(c, "read_btree_nodes_worker: error allocating buf"); + w->f->ret = -ENOMEM; + goto err; + } - if (!buf || !bio) { - bch_err(c, "read_btree_nodes_worker: error allocating bio/buf"); + bio = bio_alloc(NULL, buf_pages(b->data, c->opts.btree_node_size), 0, GFP_KERNEL); + if (!bio) { + bch_err(c, "read_btree_nodes_worker: error allocating bio"); w->f->ret = -ENOMEM; goto err; } @@ -266,11 +262,13 @@ static int read_btree_nodes_worker(void *p) !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c))) continue; - try_read_btree_node(w->f, ca, bio, buf, sector); + try_read_btree_node(w->f, ca, b, bio, sector); } err: + if (b) + __btree_node_data_free(b); + kfree(b); bio_put(bio); - free_page((unsigned long) buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); closure_put(w->cl); kfree(w); diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 7fcf248a..a7e9d891 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -1008,7 +1008,7 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) return 0; } -int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) +int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags) { struct btree_insert_entry *errored_at = NULL; struct bch_fs *c = trans->c; diff --git a/libbcachefs/btree_update.c b/libbcachefs/btree_update.c index 5d9e0237..7983c494 100644 --- a/libbcachefs/btree_update.c +++ b/libbcachefs/btree_update.c @@ -661,21 +661,22 @@ int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, * @k: key to insert * @disk_res: must be non-NULL whenever inserting or potentially * splitting data extents - * @flags: transaction commit flags + * @commit_flags: transaction commit flags * @iter_flags: btree iter update trigger flags * * Returns: 0 on success, error code on failure */ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, - struct disk_reservation *disk_res, int flags, + struct disk_reservation *disk_res, + enum bch_trans_commit_flags commit_flags, enum btree_iter_update_trigger_flags iter_flags) { - return bch2_trans_commit_do(c, disk_res, NULL, flags, + return bch2_trans_commit_do(c, disk_res, NULL, commit_flags, bch2_btree_insert_trans(trans, id, k, iter_flags)); } -int bch2_btree_delete_at(struct btree_trans *trans, - struct btree_iter *iter, unsigned update_flags) +int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, + enum btree_iter_update_trigger_flags flags) { struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); int ret = PTR_ERR_OR_ZERO(k); @@ -684,12 +685,12 @@ int bch2_btree_delete_at(struct btree_trans *trans, bkey_init(&k->k); k->k.p = iter->pos; - return bch2_trans_update(trans, iter, k, update_flags); + return bch2_trans_update(trans, iter, k, flags); } int bch2_btree_delete(struct btree_trans *trans, enum btree_id btree, struct bpos pos, - unsigned update_flags) + enum btree_iter_update_trigger_flags flags) { struct btree_iter iter; int ret; @@ -698,7 +699,7 @@ int bch2_btree_delete(struct btree_trans *trans, BTREE_ITER_cached| BTREE_ITER_intent); ret = bch2_btree_iter_traverse(trans, &iter) ?: - bch2_btree_delete_at(trans, &iter, update_flags); + bch2_btree_delete_at(trans, &iter, flags); bch2_trans_iter_exit(trans, &iter); return ret; @@ -706,7 +707,7 @@ int bch2_btree_delete(struct btree_trans *trans, int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bpos start, struct bpos end, - unsigned update_flags, + enum btree_iter_update_trigger_flags flags, u64 *journal_seq) { u32 restart_count = trans->restart_count; @@ -714,7 +715,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, struct bkey_s_c k; int ret = 0; - bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); + bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent|flags); while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { struct disk_reservation disk_res = bch2_disk_reservation_init(trans->c, 0); @@ -747,7 +748,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, bpos_min(end, k.k->p).offset - iter.pos.offset); - ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: + ret = bch2_trans_update(trans, &iter, &delete, flags) ?: bch2_trans_commit(trans, &disk_res, journal_seq, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(trans->c, &disk_res); @@ -777,12 +778,12 @@ err: */ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, struct bpos start, struct bpos end, - unsigned update_flags, + enum btree_iter_update_trigger_flags flags, u64 *journal_seq) { int ret = bch2_trans_run(c, bch2_btree_delete_range_trans(trans, id, start, end, - update_flags, journal_seq)); + flags, journal_seq)); if (ret == -BCH_ERR_transaction_restart_nested) ret = 0; return ret; diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 2c6f9b44..222a9f8f 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -47,22 +47,27 @@ enum bch_trans_commit_flags { void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); -int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); -int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); +int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, + enum btree_iter_update_trigger_flags); +int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, + enum btree_iter_update_trigger_flags); int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_iter_update_trigger_flags); int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, enum btree_iter_update_trigger_flags); -int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct - disk_reservation *, int flags, enum - btree_iter_update_trigger_flags iter_flags); +int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, + struct disk_reservation *, + enum bch_trans_commit_flags, + enum btree_iter_update_trigger_flags); int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); + struct bpos, struct bpos, + enum btree_iter_update_trigger_flags, u64 *); int bch2_btree_delete_range(struct bch_fs *, enum btree_id, - struct bpos, struct bpos, unsigned, u64 *); + struct bpos, struct bpos, + enum btree_iter_update_trigger_flags, u64 *); int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); @@ -226,7 +231,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); -int __bch2_trans_commit(struct btree_trans *, unsigned); +int __bch2_trans_commit(struct btree_trans *, enum bch_trans_commit_flags); int bch2_trans_log_str(struct btree_trans *, const char *); int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 3968f3be..e848e210 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -783,9 +783,6 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) darray_for_each(m->op.devs_have, i) __clear_bit(*i, devs.d); - CLASS(printbuf, buf)(); - buf.atomic++; - guard(rcu)(); unsigned nr_replicas = 0, i; @@ -797,11 +794,7 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) struct bch_dev_usage usage; bch2_dev_usage_read_fast(ca, &usage); - u64 nr_free = dev_buckets_free(ca, usage, m->op.watermark); - - prt_printf(&buf, "%s=%llu ", ca->name, nr_free); - - if (!nr_free) + if (!dev_buckets_free(ca, usage, m->op.watermark)) continue; nr_replicas += ca->mi.durability; @@ -809,10 +802,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) break; } - if (!nr_replicas) { - trace_data_update_done_no_rw_devs(c, buf.buf); + if (!nr_replicas) return bch_err_throw(c, data_update_done_no_rw_devs); - } if (nr_replicas < m->op.nr_replicas) return bch_err_throw(c, insufficient_devices); return 0; diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c index 901f643e..07c2a0f7 100644 --- a/libbcachefs/debug.c +++ b/libbcachefs/debug.c @@ -153,8 +153,6 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) c->verify_data = __bch2_btree_node_mem_alloc(c); if (!c->verify_data) goto out; - - list_del_init(&c->verify_data->list); } BUG_ON(b->nsets != 1); @@ -586,6 +584,8 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, i->ubuf = buf; i->size = size; i->ret = 0; + + int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); restart: seqmutex_lock(&c->btree_trans_lock); list_sort(&c->btree_trans_list, list_ptr_order_cmp); @@ -599,6 +599,11 @@ restart: if (!closure_get_not_zero(&trans->ref)) continue; + if (!trans->srcu_held) { + closure_put(&trans->ref); + continue; + } + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); @@ -620,6 +625,8 @@ restart: } seqmutex_unlock(&c->btree_trans_lock); unlocked: + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + if (i->buf.allocation_failure) ret = -ENOMEM; diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index d27b94a6..2de0dc91 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -289,7 +289,6 @@ x(EIO, sb_not_downgraded) \ x(EIO, btree_node_write_all_failed) \ x(EIO, btree_node_read_error) \ - x(EIO, btree_node_read_validate_error) \ x(EIO, btree_need_topology_repair) \ x(EIO, bucket_ref_update) \ x(EIO, trigger_alloc) \ diff --git a/libbcachefs/error.c b/libbcachefs/error.c index a9a9fe19..71649b41 100644 --- a/libbcachefs/error.c +++ b/libbcachefs/error.c @@ -103,7 +103,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) return bch_err_throw(c, btree_need_topology_repair); } else { return bch2_run_explicit_recovery_pass(c, out, BCH_RECOVERY_PASS_check_topology, 0) ?: - bch_err_throw(c, btree_node_read_validate_error); + bch_err_throw(c, btree_need_topology_repair); } } diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 036e4ad9..83cbd77d 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -50,19 +50,17 @@ void bch2_io_failures_to_text(struct printbuf *out, struct bch_io_failures *failed) { static const char * const error_types[] = { - "io", "checksum", "ec reconstruct", NULL + "btree validate", "io", "checksum", "ec reconstruct", NULL }; for (struct bch_dev_io_failures *f = failed->devs; f < failed->devs + failed->nr; f++) { unsigned errflags = - ((!!f->failed_io) << 0) | - ((!!f->failed_csum_nr) << 1) | - ((!!f->failed_ec) << 2); - - if (!errflags) - continue; + ((!!f->failed_btree_validate) << 0) | + ((!!f->failed_io) << 1) | + ((!!f->failed_csum_nr) << 2) | + ((!!f->failed_ec) << 3); bch2_printbuf_make_room(out, 1024); out->atomic++; @@ -77,7 +75,9 @@ void bch2_io_failures_to_text(struct printbuf *out, prt_char(out, ' '); - if (is_power_of_2(errflags)) { + if (!errflags) { + prt_str(out, "no error - confused"); + } else if (is_power_of_2(errflags)) { prt_bitflags(out, error_types, errflags); prt_str(out, " error"); } else { diff --git a/libbcachefs/fs-io-buffered.c b/libbcachefs/fs-io-buffered.c index dad48d44..4e82dfa6 100644 --- a/libbcachefs/fs-io-buffered.c +++ b/libbcachefs/fs-io-buffered.c @@ -257,7 +257,7 @@ err: struct printbuf buf = PRINTBUF; lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9)); - prt_printf(&buf, "read error %i from btree lookup", ret); + prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 1ceca63c..471e93a3 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -1638,7 +1638,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal i->count = count2; } - if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), + if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && + i->inode.bi_sectors != i->count, trans, inode_i_sectors_wrong, "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", w->last_pos.inode, i->inode.bi_snapshot, diff --git a/libbcachefs/io_read.h b/libbcachefs/io_read.h index 9c5ddbf8..cfc8ef35 100644 --- a/libbcachefs/io_read.h +++ b/libbcachefs/io_read.h @@ -147,7 +147,7 @@ static inline void bch2_read_extent(struct btree_trans *trans, int ret = __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, read_pos, data_btree, k, offset_into_extent, NULL, flags, -1); /* __bch2_read_extent only returns errors if BCH_READ_in_retry is set */ - WARN(ret, "unhandled error from __bch2_read_extent()"); + WARN(ret, "unhandled error from __bch2_read_extent(): %s", bch2_err_str(ret)); } int __bch2_read(struct btree_trans *, struct bch_read_bio *, struct bvec_iter, diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c index 88b1eec8..fa077341 100644 --- a/libbcachefs/io_write.c +++ b/libbcachefs/io_write.c @@ -32,6 +32,7 @@ #include "trace.h" #include <linux/blkdev.h> +#include <linux/moduleparam.h> #include <linux/prefetch.h> #include <linux/random.h> #include <linux/sched/mm.h> diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ce534061..f22b05e0 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1376,6 +1376,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) return bch_err_throw(c, erofs_filesystem_full); } + unsigned nr; int ret; if (dynamic_fault("bcachefs:add:journal_alloc")) { @@ -1384,19 +1385,16 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs) } /* 1/128th of the device by default: */ - unsigned nr = ca->mi.nbuckets >> 7; + nr = ca->mi.nbuckets >> 7; /* - * clamp journal size to 8GB, or 32GB with large_journal option: + * clamp journal size to 8192 buckets or 8GB (in sectors), whichever + * is smaller: */ - unsigned max_sectors = 1 << 24; - - if (c->opts.large_journal) - max_sectors *= 4; - nr = clamp_t(unsigned, nr, BCH_JOURNAL_BUCKETS_MIN, - max_sectors / ca->mi.bucket_size); + min(1 << 13, + (1 << 24) / ca->mi.bucket_size)); ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, new_fs); err: diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 3f06c4b2..2d6ce434 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1245,6 +1245,8 @@ noinline_for_stack static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) { struct printbuf buf = PRINTBUF; + bch2_log_msg_start(c, &buf); + enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); bool have_good = false; diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h index 4a7a6058..63f8e254 100644 --- a/libbcachefs/opts.h +++ b/libbcachefs/opts.h @@ -343,12 +343,6 @@ enum fsck_err_opts { OPT_UINT(0, U32_MAX), \ BCH_SB_JOURNAL_RECLAIM_DELAY, 100, \ NULL, "Delay in milliseconds before automatic journal reclaim")\ - x(large_journal, bool, \ - OPT_FS|OPT_MOUNT|OPT_FORMAT, \ - OPT_BOOL(), \ - BCH2_NO_SB_OPT, false, \ - NULL, "Allocate a bigger than normal journal: recovery from unclean "\ - "shutdown will be slower, but more info will be available for debugging")\ x(move_bytes_in_flight, u32, \ OPT_HUMAN_READABLE|OPT_FS|OPT_MOUNT|OPT_RUNTIME, \ OPT_UINT(1024, U32_MAX), \ @@ -395,11 +389,6 @@ enum fsck_err_opts { OPT_UINT(0, U64_MAX), \ BCH2_NO_SB_OPT, 0, \ NULL, "Rewind journal") \ - x(journal_rewind_no_extents, bool, \ - OPT_FS|OPT_MOUNT, \ - OPT_BOOL(), \ - BCH2_NO_SB_OPT, 0, \ - NULL, "Don't rewind extents when rewinding journal") \ x(recovery_passes, u64, \ OPT_FS|OPT_MOUNT, \ OPT_BITFIELD(bch2_recovery_passes), \ diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 974f8bf9..0def4ecb 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -273,24 +273,35 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level) && - !k->allocated)) { + if (unlikely(!btree_path_node(path, k->level))) { struct bch_fs *c = trans->c; + CLASS(printbuf, buf)(); + prt_str(&buf, "btree="); + bch2_btree_id_to_text(&buf, k->btree_id); + prt_printf(&buf, " level=%u ", k->level); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k)); + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { - bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s", + buf.buf); ret = -EINVAL; } -#if 0 + + if (!k->allocated) { + bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s", + buf.buf); + k->overwritten = true; + goto out; + } + bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; -#endif - k->overwritten = true; goto out; } diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index f2abe92c..340d4fb7 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -20,6 +20,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) prt_printf(&buf, "pointer to %s device %u in key\n", removed ? "removed" : "nonexistent", dev); bch2_bkey_val_to_text(&buf, c, k); + prt_newline(&buf); bool print = removed ? bch2_count_fsck_err(c, ptr_to_removed_device, &buf) diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 6980cd5b..a3438b0d 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -1974,11 +1974,15 @@ int bch2_dev_add(struct bch_fs *c, const char *path) ca->disk_sb.sb->dev_idx = dev_idx; bch2_dev_attach(c, ca, dev_idx); + set_bit(ca->dev_idx, c->online_devs.d); + if (BCH_MEMBER_GROUP(&dev_mi)) { ret = __bch2_dev_group_set(c, ca, label.buf); bch_err_msg(c, ret, "creating new label"); - if (ret) - goto err_unlock; + if (ret) { + mutex_unlock(&c->sb_lock); + goto err_late; + } } bch2_write_super(c); @@ -2526,6 +2530,8 @@ static int bch2_param_get_static_key_t(char *buffer, const struct kernel_param * return sprintf(buffer, "%c\n", static_key_enabled(key) ? 'N' : 'Y'); } +/* this is unused in userspace - silence the warning */ +__maybe_unused static const struct kernel_param_ops bch2_param_ops_static_key_t = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = bch2_param_set_static_key_t, diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index 9324ef32..b5dae114 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -1330,11 +1330,6 @@ DEFINE_EVENT(fs_str, data_update, TP_ARGS(c, str) ); -DEFINE_EVENT(fs_str, data_update_done_no_rw_devs, - TP_PROTO(struct bch_fs *c, const char *str), - TP_ARGS(c, str) -); - DEFINE_EVENT(fs_str, io_move_pred, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) diff --git a/src/bcachefs.rs b/src/bcachefs.rs index 9774ddca..91c44db0 100644 --- a/src/bcachefs.rs +++ b/src/bcachefs.rs @@ -49,6 +49,7 @@ fn handle_c_command(mut argv: Vec<String>, symlink_cmd: Option<&str>) -> i32 { "format" => c::cmd_format(argc, argv), "fs" => c::fs_cmds(argc, argv), "fsck" => c::cmd_fsck(argc, argv), + "recovery-pass" => c::cmd_recovery_pass(argc, argv), "image" => c::image_cmds(argc, argv), "list_journal" => c::cmd_list_journal(argc, argv), "kill_btree_node" => c::cmd_kill_btree_node(argc, argv), |