diff options
-rw-r--r-- | c_src/bcachefs.c | 1 | ||||
-rw-r--r-- | c_src/cmd_dump.c | 82 | ||||
-rw-r--r-- | c_src/cmd_migrate.c | 116 | ||||
-rw-r--r-- | c_src/cmd_super.c | 8 | ||||
-rw-r--r-- | c_src/cmds.h | 2 | ||||
-rw-r--r-- | c_src/libbcachefs.c | 12 | ||||
-rw-r--r-- | c_src/qcow2.c | 51 | ||||
-rw-r--r-- | c_src/qcow2.h | 4 | ||||
-rw-r--r-- | c_src/tools-util.c | 6 | ||||
-rw-r--r-- | c_src/tools-util.h | 7 | ||||
-rw-r--r-- | linux/shrinker.c | 3 | ||||
-rw-r--r-- | src/bcachefs.rs | 1 |
12 files changed, 246 insertions, 47 deletions
diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c index 58fa2549..d91e0475 100644 --- a/c_src/bcachefs.c +++ b/c_src/bcachefs.c @@ -96,6 +96,7 @@ void bcachefs_usage(void) "Debug:\n" "These commands work on offline, unmounted filesystems\n" " dump Dump filesystem metadata to a qcow2 image\n" + " undump Convert qcow2 metadata dumps to sparse raw files\n" " list List filesystem metadata in textual form\n" " list_journal List contents of journal\n" "\n" diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c index 5f354272..93e7d743 100644 --- a/c_src/cmd_dump.c +++ b/c_src/cmd_dump.c @@ -386,3 +386,85 @@ int cmd_dump(int argc, char *argv[]) darray_exit(&dev_names); return ret; } + +static void undump_usage(void) +{ + puts("bcachefs undump - turn qcow2 images from 'bcachefs dump' back into sparse raw images\n" + "Usage: bcachefs undump [OPTION]... <files>\n" + "\n" + "Options:\n" + " -f, --force Force; overwrite when needed\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +struct undump { + char *in, *out; + int infd, outfd; +}; + +int cmd_undump(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "force", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + bool force = false; + int opt; + + while ((opt = getopt_long(argc, argv, "fh", + longopts, NULL)) != -1) + switch (opt) { + case 'f': + force = true; + break; + case 'h': + undump_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + if (!argc) { + undump_usage(); + die("Please supply file(s) to convert"); + } + + DARRAY(struct undump) files = {}; + + for (unsigned i = 0; i < argc; i++) { + unsigned len = strlen(argv[i]); + const char *suffix = ".qcow2"; + unsigned suffixlen = strlen(suffix); + + if (len <= suffixlen || + strcmp(suffix, argv[i] + len - suffixlen)) { + die("%s not a qcow2 image?", argv[i]); + } + + char *out = strdup(argv[i]); + out[len - suffixlen] = '\0'; + + if (!force && !access(out, F_OK)) + die("%s already exists", out); + + darray_push(&files, ((struct undump) { + .in = argv[i], + .out = out, + .infd = xopen(argv[i], O_RDONLY), + })); + } + + darray_for_each(files, i) + i->outfd = xopen(i->out, O_WRONLY|O_CREAT|(!force ? O_EXCL : 0), 0600); + + darray_for_each(files, i) { + qcow2_to_raw(i->infd, i->outfd); + close(i->infd); + close(i->outfd); + free(i->out); + } + + darray_exit(&files); + return 0; +} diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c index aa17a160..4e4fbcba 100644 --- a/c_src/cmd_migrate.c +++ b/c_src/cmd_migrate.c @@ -86,6 +86,15 @@ found: return ret; } +static void mark_nouse_range(struct bch_dev *ca, u64 sector_from, u64 sector_to) +{ + u64 b = sector_to_bucket(ca, sector_from); + do { + set_bit(b, ca->buckets_nouse); + b++; + } while (bucket_to_sector(ca, b) < sector_to); +} + static void mark_unreserved_space(struct bch_fs *c, ranges extents) { struct bch_dev *ca = c->devs[0]; @@ -93,17 +102,16 @@ static void mark_unreserved_space(struct bch_fs *c, ranges extents) struct range i; for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) { - u64 b; - if (i.start == i.end) return; - b = sector_to_bucket(ca, i.start >> 9); - do { - set_bit(b, ca->buckets_nouse); - b++; - } while (bucket_to_sector(ca, b) << 9 < i.end); + mark_nouse_range(ca, i.start >> 9, + round_up(i.end, 1 << 9) >> 9); } + + /* Also be sure to mark the space for the default sb layout */ + unsigned sb_size = 1U << ca->disk_sb.sb->layout.sb_max_size_bits; + mark_nouse_range(ca, 0, BCH_SB_SECTOR + sb_size * 2); } static ranges reserve_new_fs_space(const char *file_path, unsigned block_size, @@ -279,7 +287,7 @@ static int migrate_fs(const char *fs_path, .dev = stat.st_dev, .extents = extents, .type = BCH_MIGRATE_migrate, - .reserve_start = roundup((format_opts.superblock_size * 2 + 8) << 9, + .reserve_start = roundup((format_opts.superblock_size * 2 + BCH_SB_SECTOR) << 9, bucket_bytes(c->devs[0])), }; @@ -381,6 +389,28 @@ static void migrate_superblock_usage(void) "Report bugs to <linux-bcachefs@vger.kernel.org>"); } +static void add_default_sb_layout(struct bch_sb* sb, unsigned *out_sb_size) +{ + unsigned sb_size = 1U << sb->layout.sb_max_size_bits; + if (out_sb_size) + *out_sb_size = sb_size; + + if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset)) + die("Can't add superblock: no space left in superblock layout"); + + for (unsigned i = 0; i < sb->layout.nr_superblocks; i++) + if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR || + le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size) + die("Superblock layout already has default superblocks"); + + memmove(&sb->layout.sb_offset[2], + &sb->layout.sb_offset[0], + sb->layout.nr_superblocks * sizeof(u64)); + sb->layout.nr_superblocks += 2; + sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR); + sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size); +} + int cmd_migrate_superblock(int argc, char *argv[]) { static const struct option longopts[] = { @@ -414,34 +444,23 @@ int cmd_migrate_superblock(int argc, char *argv[]) if (!sb_offset) die("Please specify offset of existing superblock"); - int fd = xopen(devs.data[0], O_RDWR); + int fd = xopen(devs.data[0], O_RDWR | O_EXCL); struct bch_sb *sb = __bch2_super_read(fd, sb_offset); - unsigned sb_size = 1U << sb->layout.sb_max_size_bits; + unsigned sb_size; + /* Check for invocation errors early */ + add_default_sb_layout(sb, &sb_size); - if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset)) - die("Can't add superblock: no space left in superblock layout"); - - for (unsigned i = 0; i < sb->layout.nr_superblocks; i++) - if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR || - le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR + sb_size) - die("Superblock layout already has default superblocks"); - - memmove(&sb->layout.sb_offset[2], - &sb->layout.sb_offset[0], - sb->layout.nr_superblocks * sizeof(u64)); - sb->layout.nr_superblocks += 2; - sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR); - sb->layout.sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR + sb_size); + /* Rewrite first 0-3.5k bytes with zeroes, ensuring we blow away + * the old superblock */ + // TODO: fix the "Superblock write was silently dropped" warning properly + static const char zeroes[(BCH_SB_SECTOR << 9) + sizeof(struct bch_sb)]; + xpwrite(fd, zeroes, ARRAY_SIZE(zeroes), 0, "zeroing start of disk"); - /* also write first 0-3.5k bytes with zeroes, ensure we blow away old - * superblock */ - static const char zeroes[BCH_SB_SECTOR << 9]; - xpwrite(fd, zeroes, BCH_SB_SECTOR << 9, 0, "zeroing start of disk"); - - bch2_super_write(fd, sb); xclose(fd); - /* mark new superblocks */ + /* We start a normal FS instance with the sb buckets temporarily + * prohibited from allocation, performing any recovery/upgrade/downgrade + * as needed, and only then change the superblock layout */ struct bch_opts opts = bch2_opts_empty(); opt_set(opts, nostart, true); @@ -454,29 +473,46 @@ int cmd_migrate_superblock(int argc, char *argv[]) die("error opening filesystem: %s", bch2_err_str(ret)); struct bch_dev *ca = c->devs[0]; - for (u64 b = 0; bucket_to_sector(ca, b) < BCH_SB_SECTOR + sb_size * 2; b++) - set_bit(b, ca->buckets_nouse); + mark_nouse_range(ca, 0, BCH_SB_SECTOR + sb_size * 2); ret = bch2_fs_start(c); if (ret) die("Error starting filesystem: %s", bch2_err_str(ret)); + BUG_ON(1U << ca->disk_sb.sb->layout.sb_max_size_bits != sb_size); + + /* Here the FS is already RW. + * Apply the superblock layout changes first, everything else can be + * repaired on a subsequent recovery */ + add_default_sb_layout(ca->disk_sb.sb, NULL); + ret = bch2_write_super(c); + if (ret) + die("Error writing superblock: %s", bch2_err_str(ret)); + + /* Now explicitly mark the new sb buckets in FS metadata */ + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + if (ret) + die("Error marking superblock buckets: %s", bch2_err_str(ret)); + bch2_fs_stop(c); +#if CONFIG_BCACHEFS_DEBUG + /* Verify that filesystem is clean and consistent */ + opts = bch2_opts_empty(); opt_set(opts, fsck, true); opt_set(opts, fix_errors, true); - - /* - * Hack: the free space counters are coming out wrong after marking the - * new superblock, but it's just the device counters so it's - * inconsequential: - */ + opt_set(opts, nochanges, true); c = bch2_fs_open(&devs, &opts); ret = PTR_ERR_OR_ZERO(c); if (ret) - die("error opening filesystem: %s", bch2_err_str(ret)); + die("error checking filesystem: %s", bch2_err_str(ret)); + + if (test_bit(BCH_FS_errors, &c->flags) || test_bit(BCH_FS_errors_fixed, &c->flags)) + die("Filesystem has errors after migration"); + bch2_fs_stop(c); +#endif return 0; } diff --git a/c_src/cmd_super.c b/c_src/cmd_super.c index 18e4df80..1f93ed25 100644 --- a/c_src/cmd_super.c +++ b/c_src/cmd_super.c @@ -480,6 +480,14 @@ int cmd_recover_super(int argc, char *argv[]) if (args.yes || ask_yn()) bch2_super_write(dev_fd, sb); + /* + * Ensure that 'bcachefs mount' sees the newly formatted devices when + * scanning by UUID in the udev database: + */ + CLASS(printbuf, udevadm_cmd)(); + prt_printf(&udevadm_cmd, "udevadm trigger --settle %s", dev_path); + system(udevadm_cmd.buf); + if (args.src_device) printf("Recovered device will no longer have a journal, please run fsck\n"); diff --git a/c_src/cmds.h b/c_src/cmds.h index 30b47286..fa4b87a9 100644 --- a/c_src/cmds.h +++ b/c_src/cmds.h @@ -34,6 +34,8 @@ int cmd_fsck(int argc, char *argv[]); int cmd_recovery_pass(int argc, char *argv[]); int cmd_dump(int argc, char *argv[]); +int cmd_undump(int argc, char *argv[]); + int cmd_list_journal(int argc, char *argv[]); int cmd_kill_btree_node(int argc, char *argv[]); diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c index ed84be1b..656c9132 100644 --- a/c_src/libbcachefs.c +++ b/c_src/libbcachefs.c @@ -205,7 +205,7 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, /* calculate block size: */ if (!opt_defined(fs_opts, block_size)) { unsigned max_dev_block_size = 0; - + darray_for_each(devs, i) max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd)); @@ -357,6 +357,16 @@ struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, xclose(i->bdev->bd_fd); } + /* + * Ensure that 'bcachefs mount' sees the newly formatted devices when + * scanning by UUID in the udev database: + */ + CLASS(printbuf, udevadm_cmd)(); + prt_str(&udevadm_cmd, "udevadm trigger --settle"); + darray_for_each(devs, i) + prt_printf(&udevadm_cmd, " %s", i->path); + system(udevadm_cmd.buf); + return sb.sb; } diff --git a/c_src/qcow2.c b/c_src/qcow2.c index 53959a00..7afb6812 100644 --- a/c_src/qcow2.c +++ b/c_src/qcow2.c @@ -143,8 +143,7 @@ void qcow2_image_finish(struct qcow2_image *img) memset(buf, 0, img->block_size); memcpy(buf, &hdr, sizeof(hdr)); - xpwrite(img->outfd, buf, img->block_size, 0, - "qcow2 header"); + xpwrite(img->outfd, buf, img->block_size, 0, "qcow2 header"); free(img->l2_table); free(img->l1_table); @@ -160,3 +159,51 @@ void qcow2_write_image(int infd, int outfd, ranges *data, qcow2_write_ranges(&img, data); qcow2_image_finish(&img); } + +void qcow2_to_raw(int infd, int outfd) +{ + struct qcow2_hdr hdr; + + xpread(infd, &hdr, sizeof(hdr), 0); + + if (hdr.magic != cpu_to_be32(QCOW_MAGIC)) + die("not a qcow2 image"); + + if (hdr.version != cpu_to_be32(QCOW_VERSION)) + die("incorrect qcow2 version"); + + ftruncate(outfd, be64_to_cpu(hdr.size)); + + unsigned block_size = 1U << be32_to_cpu(hdr.block_bits); + + unsigned l1_size = be32_to_cpu(hdr.l1_size); + unsigned l2_size = block_size / sizeof(u64); + + __be64 *l1_table = xcalloc(l1_size, sizeof(u64)); + __be64 *l2_table = xmalloc(block_size); + void *data_buf = xmalloc(block_size); + + xpread(infd, l1_table, l1_size * sizeof(u64), be64_to_cpu(hdr.l1_table_offset)); + + for (u64 i = 0; i < l1_size; i++) { + if (!l1_table[i]) + continue; + + xpread(infd, l2_table, block_size, be64_to_cpu(l1_table[i]) & ~QCOW_OFLAG_COPIED); + + for (unsigned j = 0; j < l2_size; j++) { + u64 src_offset = be64_to_cpu(l2_table[j]) & ~QCOW_OFLAG_COPIED; + if (!src_offset) + continue; + + u64 dst_offset = (i * l2_size + j) * block_size; + + xpread(infd, data_buf, block_size, src_offset); + xpwrite(outfd, data_buf, block_size, dst_offset, "qcow2 data"); + } + } + + free(data_buf); + free(l2_table); + free(l1_table); +} diff --git a/c_src/qcow2.h b/c_src/qcow2.h index c7b35627..7ccb2773 100644 --- a/c_src/qcow2.h +++ b/c_src/qcow2.h @@ -4,6 +4,8 @@ #include <linux/types.h> #include "tools-util.h" +#define QCOW2_L1_MAX (4ULL << 20) + struct qcow2_image { int infd; int outfd; @@ -25,4 +27,6 @@ void qcow2_image_finish(struct qcow2_image *); void qcow2_write_image(int, int, ranges *, unsigned); +void qcow2_to_raw(int, int); + #endif /* _QCOW2_H */ diff --git a/c_src/tools-util.c b/c_src/tools-util.c index f48d3f21..7733b9e9 100644 --- a/c_src/tools-util.c +++ b/c_src/tools-util.c @@ -52,15 +52,15 @@ char *mprintf(const char *fmt, ...) return str; } -void xpread(int fd, void *buf, size_t count, off_t offset) +void __xpread(int fd, void *buf, size_t count, off_t offset, const char *file, unsigned line) { while (count) { ssize_t r = pread(fd, buf, count, offset); if (r < 0) - die("read error: %m"); + die("read error: %m at %s:%u", file, line); if (!r) - die("pread error: unexpected eof"); + die("pread error: unexpected eof at %s:%u", file, line); count -= r; offset += r; } diff --git a/c_src/tools-util.h b/c_src/tools-util.h index b8104002..988e2d62 100644 --- a/c_src/tools-util.h +++ b/c_src/tools-util.h @@ -28,8 +28,13 @@ void die(const char *, ...) __attribute__ ((format (printf, 1, 2))) noreturn; char *mprintf(const char *, ...) __attribute__ ((format (printf, 1, 2))); -void xpread(int, void *, size_t, off_t); + +void __xpread(int, void *, size_t, off_t, const char *, unsigned); +#define xpread(_fd, _buf, _count, _offset) \ + __xpread(_fd, _buf, _count, _offset, __FILE__, __LINE__) + void xpwrite(int, const void *, size_t, off_t, const char *); + struct stat xfstatat(int, const char *, int); struct stat xfstat(int); struct stat xstat(const char *); diff --git a/linux/shrinker.c b/linux/shrinker.c index c0098cad..7a8a9a6a 100644 --- a/linux/shrinker.c +++ b/linux/shrinker.c @@ -15,6 +15,9 @@ static DEFINE_MUTEX(shrinker_lock); void shrinker_free(struct shrinker *s) { + if (!s) + return; + if (s->list.next) { mutex_lock(&shrinker_lock); list_del(&s->list); diff --git a/src/bcachefs.rs b/src/bcachefs.rs index 4fb0b14c..d0c1d6e8 100644 --- a/src/bcachefs.rs +++ b/src/bcachefs.rs @@ -48,6 +48,7 @@ fn handle_c_command(mut argv: Vec<String>, symlink_cmd: Option<&str>) -> i32 { "data" => c::data_cmds(argc, argv), "device" => c::device_cmds(argc, argv), "dump" => c::cmd_dump(argc, argv), + "undump" => c::cmd_undump(argc, argv), "format" => c::cmd_format(argc, argv), "fs" => c::fs_cmds(argc, argv), "fsck" => c::cmd_fsck(argc, argv), |