summaryrefslogtreecommitdiff
path: root/c_src
diff options
context:
space:
mode:
Diffstat (limited to 'c_src')
-rw-r--r--c_src/bcachefs.c164
-rw-r--r--c_src/cmd_assemble.c48
-rw-r--r--c_src/cmd_attr.c119
-rw-r--r--c_src/cmd_counters.c51
-rw-r--r--c_src/cmd_data.c127
-rw-r--r--c_src/cmd_device.c647
-rw-r--r--c_src/cmd_dump.c182
-rw-r--r--c_src/cmd_format.c435
-rw-r--r--c_src/cmd_fs.c544
-rw-r--r--c_src/cmd_fsck.c348
-rw-r--r--c_src/cmd_fusemount.c1314
-rw-r--r--c_src/cmd_key.c161
-rw-r--r--c_src/cmd_kill_btree_node.c140
-rw-r--r--c_src/cmd_list_journal.c306
-rw-r--r--c_src/cmd_migrate.c426
-rw-r--r--c_src/cmd_option.c168
-rw-r--r--c_src/cmd_run.c33
-rw-r--r--c_src/cmd_version.c9
-rw-r--r--c_src/cmds.h63
-rw-r--r--c_src/config.h0
-rw-r--r--c_src/crypto.c201
-rw-r--r--c_src/crypto.h22
-rw-r--r--c_src/libbcachefs.c754
-rw-r--r--c_src/libbcachefs.h300
-rw-r--r--c_src/posix_to_bcachefs.c461
-rw-r--r--c_src/posix_to_bcachefs.h54
-rw-r--r--c_src/qcow2.c134
-rw-r--r--c_src/qcow2.h9
-rw-r--r--c_src/tools-util.c741
-rw-r--r--c_src/tools-util.h214
30 files changed, 8175 insertions, 0 deletions
diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c
new file mode 100644
index 00000000..77bf6215
--- /dev/null
+++ b/c_src/bcachefs.c
@@ -0,0 +1,164 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ * Gabriel de Perthuis <g2p.code@gmail.com>
+ * Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <raid/raid.h>
+
+#include "cmds.h"
+
+void bcachefs_usage(void)
+{
+ puts("bcachefs - tool for managing bcachefs filesystems\n"
+ "usage: bcachefs <command> [<args>]\n"
+ "\n"
+ "Superblock commands:\n"
+ " format Format a new filesystem\n"
+ " show-super Dump superblock information to stdout\n"
+ " set-fs-option Set a filesystem option\n"
+ " reset-counters Reset all counters on an unmounted device\n"
+ "\n"
+ "Mount:\n"
+ " mount Mount a filesystem\n"
+ "\n"
+ "Repair:\n"
+ " fsck Check an existing filesystem for errors\n"
+ "\n"
+#if 0
+ "Startup/shutdown, assembly of multi device filesystems:\n"
+ " assemble Assemble an existing multi device filesystem\n"
+ " incremental Incrementally assemble an existing multi device filesystem\n"
+ " run Start a partially assembled filesystem\n"
+ " stop Stop a running filesystem\n"
+ "\n"
+#endif
+ "Commands for managing a running filesystem:\n"
+ " fs usage Show disk usage\n"
+ "\n"
+ "Commands for managing devices within a running filesystem:\n"
+ " device add Add a new device to an existing filesystem\n"
+ " device remove Remove a device from an existing filesystem\n"
+ " device online Re-add an existing member to a filesystem\n"
+ " device offline Take a device offline, without removing it\n"
+ " device evacuate Migrate data off of a specific device\n"
+ " device set-state Mark a device as failed\n"
+ " device resize Resize filesystem on a device\n"
+ " device resize-journal Resize journal on a device\n"
+ "\n"
+ "Commands for managing subvolumes and snapshots:\n"
+ " subvolume create Create a new subvolume\n"
+ " subvolume delete Delete an existing subvolume\n"
+ " subvolume snapshot Create a snapshot\n"
+ "\n"
+ "Commands for managing filesystem data:\n"
+ " data rereplicate Rereplicate degraded data\n"
+ " data job Kick off low level data jobs\n"
+ "\n"
+ "Encryption:\n"
+ " unlock Unlock an encrypted filesystem prior to running/mounting\n"
+ " set-passphrase Change passphrase on an existing (unmounted) filesystem\n"
+ " remove-passphrase Remove passphrase on an existing (unmounted) filesystem\n"
+ "\n"
+ "Migrate:\n"
+ " migrate Migrate an existing filesystem to bcachefs, in place\n"
+ " migrate-superblock Add default superblock, after bcachefs migrate\n"
+ "\n"
+ "Commands for operating on files in a bcachefs filesystem:\n"
+ " set-file-option Set various attributes on files or directories\n"
+ "\n"
+ "Debug:\n"
+ "These commands work on offline, unmounted filesystems\n"
+ " dump Dump filesystem metadata to a qcow2 image\n"
+ " list List filesystem metadata in textual form\n"
+ " list_journal List contents of journal\n"
+ "\n"
+ "FUSE:\n"
+ " fusemount Mount a filesystem via FUSE\n"
+ "\n"
+ "Miscellaneous:\n"
+ " completions Generate shell completions\n"
+ " version Display the version of the invoked bcachefs tool\n");
+}
+
+static char *pop_cmd(int *argc, char *argv[])
+{
+ char *cmd = argv[1];
+ if (!(*argc < 2))
+ memmove(&argv[1], &argv[2], (*argc - 2) * sizeof(argv[0]));
+ (*argc)--;
+ argv[*argc] = NULL;
+
+ return cmd;
+}
+
+int fs_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1) {
+ bcachefs_usage();
+ exit(EXIT_FAILURE);
+ }
+ if (!strcmp(cmd, "usage"))
+ return cmd_fs_usage(argc, argv);
+
+ return 0;
+}
+
+int device_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1)
+ return device_usage();
+ if (!strcmp(cmd, "add"))
+ return cmd_device_add(argc, argv);
+ if (!strcmp(cmd, "remove"))
+ return cmd_device_remove(argc, argv);
+ if (!strcmp(cmd, "online"))
+ return cmd_device_online(argc, argv);
+ if (!strcmp(cmd, "offline"))
+ return cmd_device_offline(argc, argv);
+ if (!strcmp(cmd, "evacuate"))
+ return cmd_device_evacuate(argc, argv);
+ if (!strcmp(cmd, "set-state"))
+ return cmd_device_set_state(argc, argv);
+ if (!strcmp(cmd, "resize"))
+ return cmd_device_resize(argc, argv);
+ if (!strcmp(cmd, "resize-journal"))
+ return cmd_device_resize_journal(argc, argv);
+
+ return 0;
+}
+
+int data_cmds(int argc, char *argv[])
+{
+ char *cmd = pop_cmd(&argc, argv);
+
+ if (argc < 1)
+ return data_usage();
+ if (!strcmp(cmd, "rereplicate"))
+ return cmd_data_rereplicate(argc, argv);
+ if (!strcmp(cmd, "job"))
+ return cmd_data_job(argc, argv);
+
+ return 0;
+}
diff --git a/c_src/cmd_assemble.c b/c_src/cmd_assemble.c
new file mode 100644
index 00000000..a997e1e1
--- /dev/null
+++ b/c_src/cmd_assemble.c
@@ -0,0 +1,48 @@
+
+#include <alloca.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_assemble(int argc, char *argv[])
+{
+ unsigned nr_devs = argc - 1;
+
+ if (argc <= 1)
+ die("Please supply at least one device");
+
+ struct bch_ioctl_assemble *assemble =
+ alloca(sizeof(*assemble) + sizeof(__u64) * nr_devs);
+
+ memset(assemble, 0, sizeof(*assemble));
+ assemble->nr_devs = nr_devs;
+
+ unsigned i;
+ for (i = 0; i < nr_devs; i++)
+ assemble->devs[i] = (unsigned long) argv[i + 1];
+
+ xioctl(bcachectl_open(), BCH_IOCTL_ASSEMBLE, assemble);
+ return 0;
+}
+
+int cmd_incremental(int argc, char *argv[])
+{
+ if (argc != 2)
+ die("Please supply exactly one device");
+
+ struct bch_ioctl_incremental incremental = {
+ .dev = (unsigned long) argv[1],
+ };
+
+ xioctl(bcachectl_open(), BCH_IOCTL_INCREMENTAL, &incremental);
+ return 0;
+}
+#endif
diff --git a/c_src/cmd_attr.c b/c_src/cmd_attr.c
new file mode 100644
index 00000000..1da41265
--- /dev/null
+++ b/c_src/cmd_attr.c
@@ -0,0 +1,119 @@
+#include <dirent.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+static void propagate_recurse(int dirfd)
+{
+ DIR *dir = fdopendir(dirfd);
+ struct dirent *d;
+
+ if (!dir) {
+ fprintf(stderr, "fdopendir() error: %m\n");
+ return;
+ }
+
+ while ((errno = 0), (d = readdir(dir))) {
+ if (!strcmp(d->d_name, ".") ||
+ !strcmp(d->d_name, ".."))
+ continue;
+
+ int ret = ioctl(dirfd, BCHFS_IOC_REINHERIT_ATTRS,
+ d->d_name);
+ if (ret < 0) {
+ fprintf(stderr, "error propagating attributes to %s: %m\n",
+ d->d_name);
+ continue;
+ }
+
+ if (!ret) /* did no work */
+ continue;
+
+ struct stat st = xfstatat(dirfd, d->d_name,
+ AT_SYMLINK_NOFOLLOW);
+ if (!S_ISDIR(st.st_mode))
+ continue;
+
+ int fd = openat(dirfd, d->d_name, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "error opening %s: %m\n", d->d_name);
+ continue;
+ }
+ propagate_recurse(fd);
+ close(fd);
+ }
+
+ if (errno)
+ die("readdir error: %m");
+}
+
+static void do_setattr(char *path, struct bch_opt_strs opts)
+{
+ unsigned i;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!opts.by_id[i])
+ continue;
+
+ char *n = mprintf("bcachefs.%s", bch2_opt_table[i].attr.name);
+
+ if (setxattr(path, n, opts.by_id[i], strlen(opts.by_id[i]), 0))
+ die("setxattr error: %m");
+
+ free(n);
+ }
+
+ struct stat st = xstat(path);
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ int dirfd = open(path, O_RDONLY);
+ if (dirfd < 0)
+ die("error opening %s: %m", path);
+
+ propagate_recurse(dirfd);
+ close(dirfd);
+}
+
+static void setattr_usage(void)
+{
+ puts("bcachefs set-file-option - set attributes on files in a bcachefs filesystem\n"
+ "Usage: bcachefs set-file-option [OPTIONS]... <files>\n"
+ "\n"
+ "Options:");
+
+ bch2_opts_usage(OPT_INODE);
+ puts(" -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_setattr(int argc, char *argv[])
+{
+ struct bch_opt_strs opts =
+ bch2_cmdline_opts_get(&argc, argv, OPT_INODE);
+ unsigned i;
+
+ for (i = 1; i < argc; i++)
+ if (argv[i][0] == '-') {
+ printf("invalid option %s\n", argv[i]);
+ setattr_usage();
+ exit(EXIT_FAILURE);
+ }
+
+ if (argc <= 1)
+ die("Please supply one or more files");
+
+ for (i = 1; i < argc; i++)
+ do_setattr(argv[i], opts);
+ bch2_opt_strs_free(&opts);
+
+ return 0;
+}
diff --git a/c_src/cmd_counters.c b/c_src/cmd_counters.c
new file mode 100644
index 00000000..9adde242
--- /dev/null
+++ b/c_src/cmd_counters.c
@@ -0,0 +1,51 @@
+#include <getopt.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/super-io.h"
+
+static void reset_counters_usage(void)
+{
+ puts("bcachefs reset-counters \n"
+ "Usage: bcachefs reset-counters device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_reset_counters(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ reset_counters_usage();
+ break;
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("please supply a device");
+ if (argc)
+ die("too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ bch2_sb_field_resize(&sb, counters, 0);
+
+ bch2_super_write(sb.bdev->bd_fd, sb.sb);
+ bch2_free_super(&sb);
+ return 0;
+}
diff --git a/c_src/cmd_data.c b/c_src/cmd_data.c
new file mode 100644
index 00000000..1ef689bc
--- /dev/null
+++ b/c_src/cmd_data.c
@@ -0,0 +1,127 @@
+
+
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/move.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+int data_usage(void)
+{
+ puts("bcachefs data - manage filesystem data\n"
+ "Usage: bcachefs data <CMD> [OPTIONS]\n"
+ "\n"
+ "Commands:\n"
+ " rereplicate Rereplicate degraded data\n"
+ " job Kick off low level data jobs\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ return 0;
+}
+
+static void data_rereplicate_usage(void)
+{
+ puts("bcachefs data rereplicate\n"
+ "Usage: bcachefs data rereplicate filesystem\n"
+ "\n"
+ "Walks existing data in a filesystem, writing additional copies\n"
+ "of any degraded data\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_data_rereplicate(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ data_rereplicate_usage();
+ }
+ args_shift(optind);
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ die("Please supply a filesystem");
+
+ if (argc)
+ die("too many arguments");
+
+ return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) {
+ .op = BCH_DATA_OP_rereplicate,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ });
+}
+
+static void data_job_usage(void)
+{
+ puts("bcachefs data job\n"
+ "Usage: bcachefs data job [job} filesystem\n"
+ "\n"
+ "Kick off a data job and report progress\n"
+ "\n"
+ "job: one of scrub, rereplicate, migrate, rewrite_old_nodes, or drop_extra_replicas\n"
+ "\n"
+ "Options:\n"
+ " -b btree btree to operate on\n"
+ " -s inode:offset start position\n"
+ " -e inode:offset end position\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_data_job(int argc, char *argv[])
+{
+ struct bch_ioctl_data op = {
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ };
+ int opt;
+
+ while ((opt = getopt(argc, argv, "s:e:h")) != -1)
+ switch (opt) {
+ case 'b':
+ op.start_btree = read_string_list_or_die(optarg,
+ __bch2_btree_ids, "btree id");
+ op.end_btree = op.start_btree;
+ break;
+ case 's':
+ op.start_pos = bpos_parse(optarg);
+ break;
+ op.end_pos = bpos_parse(optarg);
+ case 'e':
+ break;
+ case 'h':
+ data_job_usage();
+ }
+ args_shift(optind);
+
+ char *job = arg_pop();
+ if (!job)
+ die("please specify which type of job");
+
+ op.op = read_string_list_or_die(job, bch2_data_ops_strs, "bad job type");
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ fs_path = ".";
+
+ if (argc)
+ die("too many arguments");
+
+ return bchu_data(bcache_fs_open(fs_path), op);
+}
diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c
new file mode 100644
index 00000000..c86fb7f1
--- /dev/null
+++ b/c_src/cmd_device.c
@@ -0,0 +1,647 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/journal.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super-io.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/opts.h"
+#include "tools-util.h"
+
+int device_usage(void)
+{
+ puts("bcachefs device - manage devices within a running filesystem\n"
+ "Usage: bcachefs device <CMD> [OPTION]\n"
+ "\n"
+ "Commands:\n"
+ " add add a new device to an existing filesystem\n"
+ " remove remove a device from an existing filesystem\n"
+ " online re-add an existing member to a filesystem\n"
+ " offline take a device offline, without removing it\n"
+ " evacuate migrate data off a specific device\n"
+ " set-state mark a device as failed\n"
+ " resize resize filesystem on a device\n"
+ " resize-journal resize journal on a device\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ return 0;
+}
+
+static void device_add_usage(void)
+{
+ puts("bcachefs device add - add a device to an existing filesystem\n"
+ "Usage: bcachefs device add [OPTION]... filesystem device\n"
+ "\n"
+ "Options:\n"
+ " -S, --fs_size=size Size of filesystem on device\n"
+ " -B, --bucket=size Bucket size\n"
+ " -D, --discard Enable discards\n"
+ " -l, --label=label Disk label\n"
+ " -f, --force Use device even if it appears to already be formatted\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_add(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "fs_size", required_argument, NULL, 'S' },
+ { "bucket", required_argument, NULL, 'B' },
+ { "discard", no_argument, NULL, 'D' },
+ { "label", required_argument, NULL, 'l' },
+ { "force", no_argument, NULL, 'f' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct format_opts format_opts = format_opts_default();
+ struct dev_opts dev_opts = dev_opts_default();
+ bool force = false;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "S:B:Dl:fh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'S':
+ if (bch2_strtoull_h(optarg, &dev_opts.size))
+ die("invalid filesystem size");
+ break;
+ case 'B':
+ if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
+ die("bad bucket_size %s", optarg);
+ break;
+ case 'D':
+ dev_opts.discard = true;
+ break;
+ case 'l':
+ dev_opts.label = strdup(optarg);
+ break;
+ case 'f':
+ force = true;
+ break;
+ case 'h':
+ device_add_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *fs_path = arg_pop();
+ if (!fs_path)
+ die("Please supply a filesystem");
+
+ dev_opts.path = arg_pop();
+ if (!dev_opts.path)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ struct bchfs_handle fs = bcache_fs_open(fs_path);
+
+ int ret = open_for_format(&dev_opts, force);
+ if (ret)
+ die("Error opening %s: %s", dev_opts.path, strerror(-ret));
+
+ struct bch_opt_strs fs_opt_strs;
+ memset(&fs_opt_strs, 0, sizeof(fs_opt_strs));
+
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ opt_set(fs_opts, block_size,
+ read_file_u64(fs.sysfs_fd, "options/block_size"));
+ opt_set(fs_opts, btree_node_size,
+ read_file_u64(fs.sysfs_fd, "options/btree_node_size"));
+
+ struct bch_sb *sb = bch2_format(fs_opt_strs,
+ fs_opts,
+ format_opts,
+ &dev_opts, 1);
+ free(sb);
+ bchu_disk_add(fs, dev_opts.path);
+ return 0;
+}
+
+static void device_remove_usage(void)
+{
+ puts("bcachefs device_remove - remove a device from a filesystem\n"
+ "Usage:\n"
+ " bcachefs device remove <device>|<devid> <path>\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force removal, even if some data\n"
+ " couldn't be migrated\n"
+ " -F, --force-metadata Force removal, even if some metadata\n"
+ " couldn't be migrated\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_remove(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "by-id", 0, NULL, 'i' },
+ { "force", 0, NULL, 'f' },
+ { "force-metadata", 0, NULL, 'F' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ struct bchfs_handle fs;
+ bool by_id = false;
+ int opt, flags = BCH_FORCE_IF_DEGRADED, dev_idx;
+
+ while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DATA_LOST;
+ break;
+ case 'F':
+ flags |= BCH_FORCE_IF_METADATA_LOST;
+ break;
+ case 'h':
+ device_remove_usage();
+ }
+ args_shift(optind);
+
+ char *dev_str = arg_pop();
+ if (!dev_str)
+ die("Please supply a device");
+
+ char *end;
+ dev_idx = strtoul(dev_str, &end, 10);
+ if (*dev_str && !*end)
+ by_id = true;
+
+ char *fs_path = arg_pop();
+ if (fs_path) {
+ fs = bcache_fs_open(fs_path);
+
+ if (!by_id) {
+ dev_idx = bchu_dev_path_to_idx(fs, dev_str);
+ if (dev_idx < 0)
+ die("%s does not seem to be a member of %s",
+ dev_str, fs_path);
+ }
+ } else if (!by_id) {
+ fs = bchu_fs_open_by_dev(dev_str, &dev_idx);
+ } else {
+ die("Filesystem path required when specifying device by id");
+ }
+
+ bchu_disk_remove(fs, dev_idx, flags);
+ return 0;
+}
+
+static void device_online_usage(void)
+{
+ puts("bcachefs device online - readd a device to a running filesystem\n"
+ "Usage: bcachefs device online [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_online(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ device_online_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+ bchu_disk_online(fs, dev);
+ return 0;
+}
+
+static void device_offline_usage(void)
+{
+ puts("bcachefs device offline - take a device offline, without removing it\n"
+ "Usage: bcachefs device offline [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force, if data redundancy will be degraded\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_offline(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", 0, NULL, 'f' },
+ { NULL }
+ };
+ int opt, flags = 0;
+
+ while ((opt = getopt_long(argc, argv, "fh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ case 'h':
+ device_offline_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx);
+ bchu_disk_offline(fs, dev_idx, flags);
+ return 0;
+}
+
+static void device_evacuate_usage(void)
+{
+ puts("bcachefs device evacuate - move data off of a given device\n"
+ "Usage: bcachefs device evacuate [OPTION]... device\n"
+ "\n"
+ "Options:\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_device_evacuate(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ device_evacuate_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev_path = arg_pop();
+ if (!dev_path)
+ die("Please supply a device");
+
+ if (argc)
+ die("too many arguments");
+
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+ struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, dev_idx);
+
+ if (u->state == BCH_MEMBER_STATE_rw) {
+ printf("Setting %s readonly\n", dev_path);
+ bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_ro, 0);
+ }
+
+ free(u);
+
+ return bchu_data(fs, (struct bch_ioctl_data) {
+ .op = BCH_DATA_OP_migrate,
+ .start_btree = 0,
+ .start_pos = POS_MIN,
+ .end_btree = BTREE_ID_NR,
+ .end_pos = POS_MAX,
+ .migrate.dev = dev_idx,
+ });
+}
+
+static void device_set_state_usage(void)
+{
+ puts("bcachefs device set-state\n"
+ "Usage: bcachefs device set-state <new-state> <device>|<devid> <path>\n"
+ "\n"
+ "<new-state>: one of rw, ro, failed or spare\n"
+ "<path>: path to mounted filesystem, optional unless specifying device by id\n"
+ "\n"
+ "Options:\n"
+ " -f, --force Force, if data redundancy will be degraded\n"
+ " --force-if-data-lost Force, if data will be lost\n"
+ " -o, --offline Set state of an offline device\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_set_state(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", 0, NULL, 'f' },
+ { "force-if-data-lost", 0, NULL, 'F' },
+ { "offline", 0, NULL, 'o' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ struct bchfs_handle fs;
+ bool by_id = false;
+ int opt, flags = 0, dev_idx;
+ bool offline = false;
+
+ while ((opt = getopt_long(argc, argv, "foh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ break;
+ case 'F':
+ flags |= BCH_FORCE_IF_DEGRADED;
+ flags |= BCH_FORCE_IF_LOST;
+ break;
+ case 'o':
+ offline = true;
+ break;
+ case 'h':
+ device_set_state_usage();
+ }
+ args_shift(optind);
+
+ char *new_state_str = arg_pop();
+ if (!new_state_str)
+ die("Please supply a device state");
+
+ unsigned new_state = read_string_list_or_die(new_state_str,
+ bch2_member_states, "device state");
+
+ char *dev_str = arg_pop();
+ if (!dev_str)
+ die("Please supply a device");
+
+ char *end;
+ dev_idx = strtoul(dev_str, &end, 10);
+ if (*dev_str && !*end)
+ by_id = true;
+
+ if (offline) {
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_sb_handle sb = { NULL };
+
+ if (by_id)
+ die("Cannot specify offline device by id");
+
+ int ret = bch2_read_super(dev_str, &opts, &sb);
+ if (ret)
+ die("error opening %s: %s", dev_str, bch2_err_str(ret));
+
+ struct bch_member *m = bch2_members_v2_get_mut(sb.sb, sb.sb->dev_idx);
+
+ SET_BCH_MEMBER_STATE(m, new_state);
+
+ le64_add_cpu(&sb.sb->seq, 1);
+
+ bch2_super_write(sb.bdev->bd_fd, sb.sb);
+ ret = fsync(sb.bdev->bd_fd);
+ if (ret)
+ fprintf(stderr, "error writing superblock: fsync error (%m)");
+ bch2_free_super(&sb);
+ return ret;
+ }
+
+ char *fs_path = arg_pop();
+ if (fs_path) {
+ fs = bcache_fs_open(fs_path);
+
+ if (!by_id) {
+ dev_idx = bchu_dev_path_to_idx(fs, dev_str);
+ if (dev_idx < 0)
+ die("%s does not seem to be a member of %s",
+ dev_str, fs_path);
+ }
+ } else if (!by_id) {
+ fs = bchu_fs_open_by_dev(dev_str, &dev_idx);
+ } else {
+ die("Filesystem path required when specifying device by id");
+ }
+
+ bchu_disk_set_state(fs, dev_idx, new_state, flags);
+
+ return 0;
+}
+
+static void device_resize_usage(void)
+{
+ puts("bcachefs device resize \n"
+ "Usage: bcachefs device resize device [ size ]\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_resize(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ u64 size;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ device_resize_usage();
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device to resize");
+
+ int dev_fd = xopen(dev, O_RDONLY);
+
+ char *size_arg = arg_pop();
+ if (!size_arg)
+ size = get_size(dev_fd);
+ else if (bch2_strtoull_h(size_arg, &size))
+ die("invalid size");
+
+ size >>= 9;
+
+ if (argc)
+ die("Too many arguments");
+
+ struct stat dev_stat = xfstat(dev_fd);
+
+ struct mntent *mount = dev_to_mount(dev);
+ if (mount) {
+ if (!S_ISBLK(dev_stat.st_mode))
+ die("%s is mounted but isn't a block device?!", dev);
+
+ printf("Doing online resize of %s\n", dev);
+
+ struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir);
+
+ unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev);
+
+ struct bch_sb *sb = bchu_read_super(fs, -1);
+ if (idx >= sb->nr_devices)
+ die("error reading superblock: dev idx >= sb->nr_devices");
+
+ struct bch_member m = bch2_sb_member_get(sb, idx);
+
+ u64 nbuckets = size / le16_to_cpu(m.bucket_size);
+
+ if (nbuckets < le64_to_cpu(m.nbuckets))
+ die("Shrinking not supported yet");
+
+ printf("resizing %s to %llu buckets\n", dev, nbuckets);
+ bchu_disk_resize(fs, idx, nbuckets);
+ } else {
+ printf("Doing offline resize of %s\n", dev);
+
+ struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ if (IS_ERR(c))
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
+
+ struct bch_dev *resize = NULL;
+
+ for_each_online_member(c, ca) {
+ if (resize)
+ die("confused: more than one online device?");
+ resize = ca;
+ percpu_ref_get(&resize->io_ref);
+ }
+
+ u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
+
+ if (nbuckets < le64_to_cpu(resize->mi.nbuckets))
+ die("Shrinking not supported yet");
+
+ printf("resizing %s to %llu buckets\n", dev, nbuckets);
+ int ret = bch2_dev_resize(c, resize, nbuckets);
+ if (ret)
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
+
+ percpu_ref_put(&resize->io_ref);
+ bch2_fs_stop(c);
+ }
+ return 0;
+}
+
+static void device_resize_journal_usage(void)
+{
+ puts("bcachefs device resize-journal \n"
+ "Usage: bcachefs device resize-journal device size\n"
+ "\n"
+ "Options:\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_device_resize_journal(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ u64 size;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ device_resize_journal_usage();
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ int dev_fd = xopen(dev, O_RDONLY);
+
+ char *size_arg = arg_pop();
+ if (!size_arg)
+ die("Please supply a journal size");
+ else if (bch2_strtoull_h(size_arg, &size))
+ die("invalid size");
+
+ size >>= 9;
+
+ if (argc)
+ die("Too many arguments");
+
+ struct stat dev_stat = xfstat(dev_fd);
+
+ struct mntent *mount = dev_to_mount(dev);
+ if (mount) {
+ if (!S_ISBLK(dev_stat.st_mode))
+ die("%s is mounted but isn't a block device?!", dev);
+
+ struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir);
+
+ unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev);
+
+ struct bch_sb *sb = bchu_read_super(fs, -1);
+ if (idx >= sb->nr_devices)
+ die("error reading superblock: dev idx >= sb->nr_devices");
+
+ struct bch_member m = bch2_sb_member_get(sb, idx);
+
+ u64 nbuckets = size / le16_to_cpu(m.bucket_size);
+
+ printf("resizing journal on %s to %llu buckets\n", dev, nbuckets);
+ bchu_disk_resize_journal(fs, idx, nbuckets);
+ } else {
+ printf("%s is offline - starting:\n", dev);
+
+ struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
+ if (IS_ERR(c))
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
+
+ struct bch_dev *resize = NULL;
+
+ for_each_online_member(c, ca) {
+ if (resize)
+ die("confused: more than one online device?");
+ resize = ca;
+ percpu_ref_get(&resize->io_ref);
+ }
+
+ u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size);
+
+ printf("resizing journal on %s to %llu buckets\n", dev, nbuckets);
+ int ret = bch2_set_nr_journal_buckets(c, resize, nbuckets);
+ if (ret)
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
+
+ percpu_ref_put(&resize->io_ref);
+ bch2_fs_stop(c);
+ }
+ return 0;
+}
diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c
new file mode 100644
index 00000000..c9e417f2
--- /dev/null
+++ b/c_src/cmd_dump.c
@@ -0,0 +1,182 @@
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "qcow2.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/btree_io.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/extents.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super.h"
+
+static void dump_usage(void)
+{
+ puts("bcachefs dump - dump filesystem metadata\n"
+ "Usage: bcachefs dump [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -o output Output qcow2 image(s)\n"
+ " -f, --force Force; overwrite when needed\n"
+ " --nojournal Don't dump entire journal, just dirty entries\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void dump_node(struct bch_fs *c, struct bch_dev *ca, struct bkey_s_c k, ranges *data)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->dev == ca->dev_idx)
+ range_add(data, ptr->offset << 9, c->opts.btree_node_size);
+}
+
+static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
+ bool entire_journal)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ ranges data = { 0 };
+ unsigned i;
+ int ret;
+
+ /* Superblock: */
+ range_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
+ sizeof(struct bch_sb_layout));
+
+ for (i = 0; i < sb->layout.nr_superblocks; i++)
+ range_add(&data,
+ le64_to_cpu(sb->layout.sb_offset[i]) << 9,
+ vstruct_bytes(sb));
+
+ /* Journal: */
+ for (i = 0; i < ca->journal.nr; i++)
+ if (entire_journal ||
+ ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
+ u64 bucket = ca->journal.buckets[i];
+
+ range_add(&data,
+ bucket_bytes(ca) * bucket,
+ bucket_bytes(ca));
+ }
+
+ /* Btree: */
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ ret = __for_each_btree_node(trans, iter, i, POS_MIN, 0, 1, 0, b, ({
+ struct btree_node_iter iter;
+ struct bkey u;
+ struct bkey_s_c k;
+
+ for_each_btree_node_key_unpack(b, k, &iter, &u)
+ dump_node(c, ca, k, &data);
+ 0;
+ }));
+
+ if (ret)
+ die("error %s walking btree nodes", bch2_err_str(ret));
+
+ struct btree *b = bch2_btree_id_root(c, i)->b;
+ if (!btree_node_fake(b))
+ dump_node(c, ca, bkey_i_to_s_c(&b->key), &data);
+
+ bch2_trans_put(trans);
+ }
+
+ qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data,
+ max_t(unsigned, c->opts.btree_node_size / 8, block_bytes(c)));
+ darray_exit(&data);
+}
+
+int cmd_dump(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "nojournal", no_argument, NULL, 'j' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct bch_opts opts = bch2_opts_empty();
+ char *out = NULL;
+ unsigned nr_devices = 0;
+ bool force = false, entire_journal = true;
+ int fd, opt;
+
+ opt_set(opts, direct_io, false);
+ opt_set(opts, noexcl, true);
+ opt_set(opts, read_only, true);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, norecovery, true);
+ opt_set(opts, degraded, true);
+ opt_set(opts, very_degraded, true);
+ opt_set(opts, errors, BCH_ON_ERROR_continue);
+ opt_set(opts, fix_errors, FSCK_FIX_no);
+
+ while ((opt = getopt_long(argc, argv, "o:fvh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'o':
+ out = optarg;
+ break;
+ case 'f':
+ force = true;
+ break;
+ case 'j':
+ entire_journal = false;
+ break;
+ case 'v':
+ opt_set(opts, verbose, true);
+ break;
+ case 'h':
+ dump_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!out)
+ die("Please supply output filename");
+
+ if (!argc)
+ die("Please supply device(s) to check");
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ if (IS_ERR(c))
+ die("error opening devices: %s", bch2_err_str(PTR_ERR(c)));
+
+ down_read(&c->state_lock);
+
+ for_each_online_member(c, ca)
+ nr_devices++;
+
+ BUG_ON(!nr_devices);
+
+ for_each_online_member(c, ca) {
+ int flags = O_WRONLY|O_CREAT|O_TRUNC;
+
+ if (!force)
+ flags |= O_EXCL;
+
+ char *path = nr_devices > 1
+ ? mprintf("%s.%u.qcow2", out, ca->dev_idx)
+ : mprintf("%s.qcow2", out);
+ fd = xopen(path, flags, 0600);
+ free(path);
+
+ dump_one_device(c, ca, fd, entire_journal);
+ close(fd);
+ }
+
+ up_read(&c->state_lock);
+
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c
new file mode 100644
index 00000000..d0c8e197
--- /dev/null
+++ b/c_src/cmd_format.c
@@ -0,0 +1,435 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ * Gabriel de Perthuis <g2p.code@gmail.com>
+ * Jacob Malevich <jam@datera.io>
+ *
+ * GPLv2
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "posix_to_bcachefs.h"
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+#include "libbcachefs/util.h"
+
+#include "libbcachefs/darray.h"
+
+#define OPTS \
+x(0, replicas, required_argument) \
+x(0, encrypted, no_argument) \
+x(0, no_passphrase, no_argument) \
+x('L', fs_label, required_argument) \
+x('U', uuid, required_argument) \
+x(0, fs_size, required_argument) \
+x(0, superblock_size, required_argument) \
+x(0, bucket_size, required_argument) \
+x('l', label, required_argument) \
+x(0, discard, no_argument) \
+x(0, data_allowed, required_argument) \
+x(0, durability, required_argument) \
+x(0, version, required_argument) \
+x(0, no_initialize, no_argument) \
+x(0, source, required_argument) \
+x('f', force, no_argument) \
+x('q', quiet, no_argument) \
+x('v', verbose, no_argument) \
+x('h', help, no_argument)
+
+static void usage(void)
+{
+ puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n"
+ "Usage: bcachefs format [OPTION]... <devices>\n"
+ "\n"
+ "Options:");
+
+ bch2_opts_usage(OPT_FORMAT);
+
+ puts(
+ " --replicas=# Sets both data and metadata replicas\n"
+ " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
+ " --no_passphrase Don't encrypt master encryption key\n"
+ " -L, --fs_label=label\n"
+ " -U, --uuid=uuid\n"
+ " --superblock_size=size\n"
+ " --source=path Initialize the bcachefs filesystem from this root directory\n"
+ "\n"
+ "Device specific options:");
+
+ bch2_opts_usage(OPT_DEVICE);
+
+ puts(" -l, --label=label Disk label\n"
+ "\n"
+ " -f, --force\n"
+ " -q, --quiet Only print errors\n"
+ " -v, --verbose Verbose filesystem initialization\n"
+ " -h, --help Display this help and exit\n"
+ "\n"
+ "Device specific options must come before corresponding devices, e.g.\n"
+ " bcachefs format --label cache /dev/sdb /dev/sdc\n"
+ "\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+enum {
+ O_no_opt = 1,
+#define x(shortopt, longopt, arg) O_##longopt,
+ OPTS
+#undef x
+};
+
+#define x(shortopt, longopt, arg) { \
+ .name = #longopt, \
+ .has_arg = arg, \
+ .flag = NULL, \
+ .val = O_##longopt, \
+},
+static const struct option format_opts[] = {
+ OPTS
+ { NULL }
+};
+#undef x
+
+u64 read_flag_list_or_die(char *opt, const char * const list[],
+ const char *msg)
+{
+ u64 v = bch2_read_flag_list(opt, list);
+ if (v == (u64) -1)
+ die("Bad %s %s", msg, opt);
+
+ return v;
+}
+
+void build_fs(struct bch_fs *c, const char *src_path)
+{
+ struct copy_fs_state s = {};
+ int src_fd = xopen(src_path, O_RDONLY|O_NOATIME);
+ struct stat stat = xfstat(src_fd);
+
+ if (!S_ISDIR(stat.st_mode))
+ die("%s is not a directory", src_path);
+
+ copy_fs(c, src_fd, src_path, &s);
+}
+
+int cmd_format(int argc, char *argv[])
+{
+ DARRAY(struct dev_opts) devices = { 0 };
+ DARRAY(char *) device_paths = { 0 };
+ struct format_opts opts = format_opts_default();
+ struct dev_opts dev_opts = dev_opts_default();
+ bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false;
+ bool unconsumed_dev_option = false;
+ unsigned v;
+ int opt;
+
+ struct bch_opt_strs fs_opt_strs =
+ bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ if (getenv("BCACHEFS_KERNEL_ONLY"))
+ initialize = false;
+
+ while ((opt = getopt_long(argc, argv,
+ "-L:U:g:fqhv",
+ format_opts,
+ NULL)) != -1)
+ switch (opt) {
+ case O_replicas:
+ if (kstrtouint(optarg, 10, &v) ||
+ !v ||
+ v > BCH_REPLICAS_MAX)
+ die("invalid replicas");
+
+ opt_set(fs_opts, metadata_replicas, v);
+ opt_set(fs_opts, data_replicas, v);
+ break;
+ case O_source:
+ opts.source = optarg;
+ break;
+ case O_encrypted:
+ opts.encrypted = true;
+ break;
+ case O_no_passphrase:
+ no_passphrase = true;
+ break;
+ case O_fs_label:
+ case 'L':
+ opts.label = optarg;
+ break;
+ case O_uuid:
+ case 'U':
+ if (uuid_parse(optarg, opts.uuid.b))
+ die("Bad uuid");
+ break;
+ case O_force:
+ case 'f':
+ force = true;
+ break;
+ case O_fs_size:
+ if (bch2_strtoull_h(optarg, &dev_opts.size))
+ die("invalid filesystem size");
+ unconsumed_dev_option = true;
+ break;
+ case O_superblock_size:
+ if (bch2_strtouint_h(optarg, &opts.superblock_size))
+ die("invalid filesystem size");
+
+ opts.superblock_size >>= 9;
+ break;
+ case O_bucket_size:
+ if (bch2_strtoull_h(optarg, &dev_opts.bucket_size))
+ die("bad bucket_size %s", optarg);
+ unconsumed_dev_option = true;
+ break;
+ case O_label:
+ case 'l':
+ dev_opts.label = optarg;
+ unconsumed_dev_option = true;
+ break;
+ case O_discard:
+ dev_opts.discard = true;
+ unconsumed_dev_option = true;
+ break;
+ case O_data_allowed:
+ dev_opts.data_allowed =
+ read_flag_list_or_die(optarg,
+ __bch2_data_types, "data type");
+ unconsumed_dev_option = true;
+ break;
+ case O_durability:
+ if (kstrtouint(optarg, 10, &dev_opts.durability) ||
+ dev_opts.durability > BCH_REPLICAS_MAX)
+ die("invalid durability");
+ unconsumed_dev_option = true;
+ break;
+ case O_version:
+ if (kstrtouint(optarg, 10, &opts.version))
+ die("invalid version");
+ break;
+ case O_no_initialize:
+ initialize = false;
+ break;
+ case O_no_opt:
+ darray_push(&device_paths, optarg);
+ dev_opts.path = optarg;
+ darray_push(&devices, dev_opts);
+ dev_opts.size = 0;
+ unconsumed_dev_option = false;
+ break;
+ case O_quiet:
+ case 'q':
+ quiet = true;
+ break;
+ case 'v':
+ verbose = true;
+ case O_help:
+ case 'h':
+ usage();
+ exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ exit(EXIT_FAILURE);
+ break;
+ }
+
+ if (unconsumed_dev_option)
+ die("Options for devices apply to subsequent devices; got a device option with no device");
+
+ if (opts.version != bcachefs_metadata_version_current)
+ initialize = false;
+
+ if (!devices.nr)
+ die("Please supply a device");
+
+ if (opts.encrypted && !no_passphrase) {
+ opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+ initialize = false;
+ }
+
+ darray_for_each(devices, dev) {
+ int ret = open_for_format(dev, force);
+ if (ret)
+ die("Error opening %s: %s", dev_opts.path, strerror(-ret));
+ }
+
+ struct bch_sb *sb =
+ bch2_format(fs_opt_strs,
+ fs_opts,
+ opts,
+ devices.data, devices.nr);
+ bch2_opt_strs_free(&fs_opt_strs);
+
+ if (!quiet) {
+ struct printbuf buf = PRINTBUF;
+
+ buf.human_readable_units = true;
+
+ bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2);
+ printf("%s", buf.buf);
+
+ printbuf_exit(&buf);
+ }
+ free(sb);
+
+ if (opts.passphrase) {
+ memzero_explicit(opts.passphrase, strlen(opts.passphrase));
+ free(opts.passphrase);
+ }
+
+ darray_exit(&devices);
+
+ /* don't skip initialization when we have to build an image from a source */
+ if (opts.source && !initialize) {
+ printf("Warning: Forcing the initialization because the source flag was supplied\n");
+ initialize = 1;
+ }
+
+ if (initialize) {
+ struct bch_opts mount_opts = bch2_opts_empty();
+
+
+ opt_set(mount_opts, verbose, verbose);
+
+ /*
+ * Start the filesystem once, to allocate the journal and create
+ * the root directory:
+ */
+ struct bch_fs *c = bch2_fs_open(device_paths.data,
+ device_paths.nr,
+ mount_opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", device_paths.data[0],
+ bch2_err_str(PTR_ERR(c)));
+
+ if (opts.source) {
+ build_fs(c, opts.source);
+ }
+
+
+ bch2_fs_stop(c);
+ }
+
+ darray_exit(&device_paths);
+
+ return 0;
+}
+
+static void show_super_usage(void)
+{
+ puts("bcachefs show-super \n"
+ "Usage: bcachefs show-super [OPTION].. device\n"
+ "\n"
+ "Options:\n"
+ " -f, --fields=(fields) list of sections to print\n"
+ " --field-only=fiel) print superblock section only, no header\n"
+ " -l, --layout print superblock layout\n"
+ " -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+int cmd_show_super(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "fields", 1, NULL, 'f' },
+ { "field-only", 1, NULL, 'F' },
+ { "layout", 0, NULL, 'l' },
+ { "help", 0, NULL, 'h' },
+ { NULL }
+ };
+ unsigned fields = 0;
+ int field_only = -1;
+ bool print_layout = false;
+ bool print_default_fields = true;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "f:lh", longopts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ fields = !strcmp(optarg, "all")
+ ? ~0
+ : read_flag_list_or_die(optarg,
+ bch2_sb_fields, "superblock field");
+ print_default_fields = false;
+ break;
+ case 'F':
+ field_only = read_string_list_or_die(optarg,
+ bch2_sb_fields, "superblock field");
+ print_default_fields = false;
+ break;
+ case 'l':
+ print_layout = true;
+ break;
+ case 'h':
+ show_super_usage();
+ break;
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("please supply a device");
+ if (argc)
+ die("too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ if (print_default_fields) {
+ fields |= bch2_sb_field_get(sb.sb, members_v2)
+ ? 1 << BCH_SB_FIELD_members_v2
+ : 1 << BCH_SB_FIELD_members_v1;
+ fields |= 1 << BCH_SB_FIELD_errors;
+ }
+
+ struct printbuf buf = PRINTBUF;
+
+ buf.human_readable_units = true;
+
+ if (field_only >= 0) {
+ struct bch_sb_field *f = bch2_sb_field_get_id(sb.sb, field_only);
+
+ if (f)
+ __bch2_sb_field_to_text(&buf, sb.sb, f);
+ } else {
+ printbuf_tabstop_push(&buf, 44);
+
+ char *model = fd_to_dev_model(sb.bdev->bd_fd);
+ prt_str(&buf, "Device:");
+ prt_tab(&buf);
+ prt_str(&buf, model);
+ prt_newline(&buf);
+ free(model);
+
+ bch2_sb_to_text(&buf, sb.sb, print_layout, fields);
+ }
+ printf("%s", buf.buf);
+
+ bch2_free_super(&sb);
+ printbuf_exit(&buf);
+ return 0;
+}
diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c
new file mode 100644
index 00000000..82eeceff
--- /dev/null
+++ b/c_src/cmd_fs.c
@@ -0,0 +1,544 @@
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include <uuid/uuid.h>
+
+#include "linux/sort.h"
+#include "linux/rcupdate.h"
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/disk_accounting.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#include "libbcachefs/darray.h"
+
+static void __dev_usage_type_to_text(struct printbuf *out,
+ enum bch_data_type type,
+ unsigned bucket_size,
+ u64 buckets, u64 sectors, u64 frag)
+{
+ bch2_prt_data_type(out, type);
+ prt_char(out, ':');
+ prt_tab(out);
+
+ prt_units_u64(out, sectors << 9);
+ prt_tab_rjust(out);
+
+ prt_printf(out, "%llu", buckets);
+ prt_tab_rjust(out);
+
+ if (frag) {
+ prt_units_u64(out, frag << 9);
+ prt_tab_rjust(out);
+ }
+ prt_newline(out);
+}
+
+static void dev_usage_type_to_text(struct printbuf *out,
+ struct bch_ioctl_dev_usage_v2 *u,
+ enum bch_data_type type)
+{
+ u64 sectors = 0;
+ switch (type) {
+ case BCH_DATA_free:
+ case BCH_DATA_need_discard:
+ case BCH_DATA_need_gc_gens:
+ /* sectors are 0 for these types so calculate sectors for them */
+ sectors = u->d[type].buckets * u->bucket_size;
+ break;
+ default:
+ sectors = u->d[type].sectors;
+ }
+
+ __dev_usage_type_to_text(out, type,
+ u->bucket_size,
+ u->d[type].buckets,
+ sectors,
+ u->d[type].fragmented);
+}
+
+static void dev_usage_to_text(struct printbuf *out,
+ struct bchfs_handle fs,
+ struct dev_name *d)
+{
+ struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, d->idx);
+
+ prt_newline(out);
+ prt_printf(out, "%s (device %u):", d->label ?: "(no label)", d->idx);
+ prt_tab(out);
+ prt_str(out, d->dev ?: "(device not found)");
+ prt_tab_rjust(out);
+
+ prt_str(out, bch2_member_states[u->state]);
+ prt_tab_rjust(out);
+
+ prt_newline(out);
+
+ printbuf_indent_add(out, 2);
+ prt_tab(out);
+
+ prt_str(out, "data");
+ prt_tab_rjust(out);
+
+ prt_str(out, "buckets");
+ prt_tab_rjust(out);
+
+ prt_str(out, "fragmented");
+ prt_tab_rjust(out);
+
+ prt_newline(out);
+
+ for (unsigned i = 0; i < u->nr_data_types; i++)
+ dev_usage_type_to_text(out, u, i);
+
+ prt_str(out, "capacity:");
+ prt_tab(out);
+
+ prt_units_u64(out, (u->nr_buckets * u->bucket_size) << 9);
+ prt_tab_rjust(out);
+ prt_printf(out, "%llu", u->nr_buckets);
+ prt_tab_rjust(out);
+
+ printbuf_indent_sub(out, 2);
+
+ prt_newline(out);
+ free(u);
+}
+
+static int dev_by_label_cmp(const void *_l, const void *_r)
+{
+ const struct dev_name *l = _l, *r = _r;
+
+ return (l->label && r->label
+ ? strcmp(l->label, r->label) : 0) ?:
+ (l->dev && r->dev
+ ? strcmp(l->dev, r->dev) : 0) ?:
+ cmp_int(l->idx, r->idx);
+}
+
+static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx)
+{
+ darray_for_each(*dev_names, dev)
+ if (dev->idx == idx)
+ return dev;
+ return NULL;
+}
+
+static void devs_usage_to_text(struct printbuf *out,
+ struct bchfs_handle fs,
+ dev_names dev_names)
+{
+ sort(dev_names.data, dev_names.nr,
+ sizeof(dev_names.data[0]), dev_by_label_cmp, NULL);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 14);
+
+ darray_for_each(dev_names, dev)
+ dev_usage_to_text(out, fs, dev);
+
+ darray_for_each(dev_names, dev) {
+ free(dev->dev);
+ free(dev->label);
+ }
+}
+
+static void persistent_reserved_to_text(struct printbuf *out,
+ unsigned nr_replicas, s64 sectors)
+{
+ if (!sectors)
+ return;
+
+ prt_str(out, "reserved:");
+ prt_tab(out);
+ prt_printf(out, "%u/%u ", 1, nr_replicas);
+ prt_tab(out);
+ prt_str(out, "[] ");
+ prt_units_u64(out, sectors << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+}
+
+static void replicas_usage_to_text(struct printbuf *out,
+ const struct bch_replicas_entry_v1 *r,
+ s64 sectors,
+ dev_names *dev_names)
+{
+ if (!sectors)
+ return;
+
+ char devs[4096], *d = devs;
+ *d++ = '[';
+
+ unsigned durability = 0;
+
+ for (unsigned i = 0; i < r->nr_devs; i++) {
+ unsigned dev_idx = r->devs[i];
+ struct dev_name *dev = dev_idx_to_name(dev_names, dev_idx);
+
+ durability += dev ? dev->durability : 0;
+
+ if (i)
+ *d++ = ' ';
+
+ d += dev && dev->dev
+ ? sprintf(d, "%s", dev->dev)
+ : sprintf(d, "%u", dev_idx);
+ }
+ *d++ = ']';
+ *d++ = '\0';
+
+ bch2_prt_data_type(out, r->data_type);
+ prt_char(out, ':');
+ prt_tab(out);
+
+ prt_printf(out, "%u/%u ", r->nr_required, r->nr_devs);
+ prt_tab(out);
+
+ prt_printf(out, "%u ", durability);
+ prt_tab(out);
+
+ prt_printf(out, "%s ", devs);
+ prt_tab(out);
+
+ prt_units_u64(out, sectors << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+}
+
+#define for_each_usage_replica(_u, _r) \
+ for (_r = (_u)->replicas; \
+ _r != (void *) (_u)->replicas + (_u)->replica_entries_bytes;\
+ _r = replicas_usage_next(_r), \
+ BUG_ON((void *) _r > (void *) (_u)->replicas + (_u)->replica_entries_bytes))
+
+typedef DARRAY(struct bkey_i_accounting *) darray_accounting_p;
+
+static int accounting_p_cmp(const void *_l, const void *_r)
+{
+ const struct bkey_i_accounting * const *l = _l;
+ const struct bkey_i_accounting * const *r = _r;
+
+ struct bpos lp = (*l)->k.p, rp = (*r)->k.p;
+
+ bch2_bpos_swab(&lp);
+ bch2_bpos_swab(&rp);
+ return bpos_cmp(lp, rp);
+}
+
+static void accounting_sort(darray_accounting_p *sorted,
+ struct bch_ioctl_query_accounting *in)
+{
+ for (struct bkey_i_accounting *a = in->accounting;
+ a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s);
+ a = bkey_i_to_accounting(bkey_next(&a->k_i)))
+ if (darray_push(sorted, a))
+ die("memory allocation failure");
+
+ sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL);
+}
+
+static int fs_usage_v1_to_text(struct printbuf *out,
+ struct bchfs_handle fs,
+ dev_names dev_names)
+{
+ struct bch_ioctl_query_accounting *a =
+ bchu_fs_accounting(fs,
+ BIT(BCH_DISK_ACCOUNTING_persistent_reserved)|
+ BIT(BCH_DISK_ACCOUNTING_replicas)|
+ BIT(BCH_DISK_ACCOUNTING_compression)|
+ BIT(BCH_DISK_ACCOUNTING_btree)|
+ BIT(BCH_DISK_ACCOUNTING_rebalance_work));
+ if (!a)
+ return -1;
+
+ darray_accounting_p a_sorted = {};
+
+ accounting_sort(&a_sorted, a);
+
+ prt_str(out, "Filesystem: ");
+ pr_uuid(out, fs.uuid.b);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+
+ prt_str(out, "Size:");
+ prt_tab(out);
+ prt_units_u64(out, a->capacity << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Used:");
+ prt_tab(out);
+ prt_units_u64(out, a->used << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Online reserved:");
+ prt_tab(out);
+ prt_units_u64(out, a->online_reserved << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Data type");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Required/total");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Durability");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Devices");
+ prt_newline(out);
+
+ printbuf_tabstop_push(out, 14);
+
+ unsigned prev_type = 0;
+
+ darray_for_each(a_sorted, i) {
+ struct bkey_i_accounting *a = *i;
+
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, a->k.p);
+
+ bool new_type = acc_k.type != prev_type;
+ prev_type = acc_k.type;
+
+ switch (acc_k.type) {
+ case BCH_DISK_ACCOUNTING_persistent_reserved:
+ persistent_reserved_to_text(out,
+ acc_k.persistent_reserved.nr_replicas,
+ a->v.d[0]);
+ break;
+ case BCH_DISK_ACCOUNTING_replicas:
+ replicas_usage_to_text(out, &acc_k.replicas, a->v.d[0], &dev_names);
+ break;
+ case BCH_DISK_ACCOUNTING_compression:
+ if (new_type) {
+ prt_printf(out, "\nCompression:\n");
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 12);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 24);
+ prt_printf(out, "type\tcompressed\runcompressed\raverage extent size\r\n");
+ }
+
+ u64 nr_extents = a->v.d[0];
+ u64 sectors_uncompressed = a->v.d[1];
+ u64 sectors_compressed = a->v.d[2];
+
+ bch2_prt_compression_type(out, acc_k.compression.type);
+ prt_tab(out);
+
+ prt_human_readable_u64(out, sectors_compressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, sectors_uncompressed << 9);
+ prt_tab_rjust(out);
+
+ prt_human_readable_u64(out, nr_extents
+ ? div_u64(sectors_uncompressed << 9, nr_extents)
+ : 0);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ break;
+ case BCH_DISK_ACCOUNTING_btree:
+ if (new_type) {
+ prt_printf(out, "\nBtree usage:\n");
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 12);
+ printbuf_tabstop_push(out, 16);
+ }
+ prt_printf(out, "%s:\t", bch2_btree_id_str(acc_k.btree.id));
+ prt_units_u64(out, a->v.d[0] << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+ break;
+ case BCH_DISK_ACCOUNTING_rebalance_work:
+ if (new_type)
+ prt_printf(out, "\nPending rebalance work:\n");
+ prt_units_u64(out, a->v.d[0] << 9);
+ prt_newline(out);
+ break;
+ }
+ }
+
+ darray_exit(&a_sorted);
+ free(a);
+ return 0;
+}
+
+static void fs_usage_v0_to_text(struct printbuf *out,
+ struct bchfs_handle fs,
+ dev_names dev_names)
+{
+ struct bch_ioctl_fs_usage *u = bchu_fs_usage(fs);
+
+ prt_str(out, "Filesystem: ");
+ pr_uuid(out, fs.uuid.b);
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+
+ prt_str(out, "Size:");
+ prt_tab(out);
+ prt_units_u64(out, u->capacity << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Used:");
+ prt_tab(out);
+ prt_units_u64(out, u->used << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_str(out, "Online reserved:");
+ prt_tab(out);
+ prt_units_u64(out, u->online_reserved << 9);
+ prt_tab_rjust(out);
+ prt_newline(out);
+
+ prt_newline(out);
+
+ printbuf_tabstops_reset(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Data type");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 16);
+ prt_str(out, "Required/total");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Durability");
+ prt_tab(out);
+
+ printbuf_tabstop_push(out, 14);
+ prt_str(out, "Devices");
+ prt_newline(out);
+
+ printbuf_tabstop_push(out, 14);
+
+ for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++)
+ persistent_reserved_to_text(out, i, u->persistent_reserved[i]);
+
+ struct bch_replicas_usage *r;
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type < BCH_DATA_user)
+ replicas_usage_to_text(out, &r->r, r->sectors, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type == BCH_DATA_user &&
+ r->r.nr_required <= 1)
+ replicas_usage_to_text(out, &r->r, r->sectors, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type == BCH_DATA_user &&
+ r->r.nr_required > 1)
+ replicas_usage_to_text(out, &r->r, r->sectors, &dev_names);
+
+ for_each_usage_replica(u, r)
+ if (r->r.data_type > BCH_DATA_user)
+ replicas_usage_to_text(out, &r->r, r->sectors, &dev_names);
+
+ free(u);
+}
+
+static void fs_usage_to_text(struct printbuf *out, const char *path)
+{
+ struct bchfs_handle fs = bcache_fs_open(path);
+
+ dev_names dev_names = bchu_fs_get_devices(fs);
+
+ if (!fs_usage_v1_to_text(out, fs, dev_names))
+ goto devs;
+
+ fs_usage_v0_to_text(out, fs, dev_names);
+devs:
+ devs_usage_to_text(out, fs, dev_names);
+
+ darray_exit(&dev_names);
+
+ bcache_fs_close(fs);
+}
+
+static void fs_usage_usage(void)
+{
+ puts("bcachefs fs usage - display detailed filesystem usage\n"
+ "Usage: bcachefs fs usage [OPTION]... <mountpoint>\n"
+ "\n"
+ "Options:\n"
+ " -h, --human-readable Human readable units\n"
+ " -H, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_fs_usage(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'H' },
+ { "human-readable", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ bool human_readable = false;
+ struct printbuf buf = PRINTBUF;
+ char *fs;
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, "h",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'h':
+ human_readable = true;
+ break;
+ case 'H':
+ fs_usage_usage();
+ exit(EXIT_SUCCESS);
+ default:
+ fs_usage_usage();
+ exit(EXIT_FAILURE);
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ printbuf_reset(&buf);
+ buf.human_readable_units = human_readable;
+ fs_usage_to_text(&buf, ".");
+ printf("%s", buf.buf);
+ } else {
+ while ((fs = arg_pop())) {
+ printbuf_reset(&buf);
+ buf.human_readable_units = human_readable;
+ fs_usage_to_text(&buf, fs);
+ printf("%s", buf.buf);
+ }
+ }
+
+ printbuf_exit(&buf);
+ return 0;
+}
diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c
new file mode 100644
index 00000000..2ea51ff2
--- /dev/null
+++ b/c_src/cmd_fsck.c
@@ -0,0 +1,348 @@
+
+#include <errno.h>
+#include <getopt.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "cmds.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs.h"
+#include "libbcachefs/super.h"
+#include "libbcachefs/super-io.h"
+#include "tools-util.h"
+
+static void fsck_usage(void)
+{
+ puts("bcachefs fsck - filesystem check and repair\n"
+ "Usage: bcachefs fsck [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -p Automatic repair (no questions)\n"
+ " -n Don't repair, only check for errors\n"
+ " -y Assume \"yes\" to all questions\n"
+ " -f Force checking even if filesystem is marked clean\n"
+ " -r, --ratelimit_errors Don't display more than 10 errors of a given type\n"
+ " -R, --reconstruct_alloc Reconstruct the alloc btree\n"
+ " -k, --kernel Use the in-kernel fsck implementation\n"
+ " -v Be verbose\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void setnonblocking(int fd)
+{
+ int flags = fcntl(fd, F_GETFL);
+ if (fcntl(fd, F_SETFL, flags|O_NONBLOCK))
+ die("fcntl error: %m");
+}
+
+static int do_splice(int rfd, int wfd)
+{
+ char buf[4096], *b = buf;
+
+ int r = read(rfd, buf, sizeof(buf));
+ if (r < 0 && errno == EAGAIN)
+ return 0;
+ if (r < 0)
+ return r;
+ if (!r)
+ return 1;
+ do {
+ ssize_t w = write(wfd, b, r);
+ if (w < 0)
+ die("%s: write error: %m", __func__);
+ r -= w;
+ b += w;
+ } while (r);
+ return 0;
+}
+
+static int splice_fd_to_stdinout(int fd)
+{
+ setnonblocking(STDIN_FILENO);
+ setnonblocking(fd);
+
+ bool stdin_closed = false;
+
+ while (true) {
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ FD_SET(fd, &fds);
+ if (!stdin_closed)
+ FD_SET(STDIN_FILENO, &fds);
+
+ if (select(fd + 1, &fds, NULL, NULL, NULL) < 0)
+ die("select error: %m");
+
+ int r = do_splice(fd, STDOUT_FILENO);
+ if (r < 0)
+ return r;
+ if (r)
+ break;
+
+ r = do_splice(STDIN_FILENO, fd);
+ if (r < 0)
+ return r;
+ if (r)
+ stdin_closed = true;
+ }
+
+ return close(fd);
+}
+
+static int fsck_online(const char *dev_path, const char *opt_str)
+{
+ int dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx);
+
+ struct bch_ioctl_fsck_online fsck = {
+ .opts = (unsigned long) opt_str
+ };
+
+ int fsck_fd = ioctl(fs.ioctl_fd, BCH_IOCTL_FSCK_ONLINE, &fsck);
+ if (fsck_fd < 0)
+ die("BCH_IOCTL_FSCK_ONLINE error: %s", bch2_err_str(errno));
+
+ return splice_fd_to_stdinout(fsck_fd);
+}
+
+static void append_opt(struct printbuf *out, const char *opt)
+{
+ if (out->pos)
+ prt_char(out, ',');
+ prt_str(out, opt);
+}
+
+static bool should_use_kernel_fsck(darray_str devs)
+{
+ system("modprobe bcachefs");
+
+ unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK)
+ ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
+ : 0;
+
+ if (!kernel_version)
+ return false;
+
+ if (kernel_version == bcachefs_metadata_version_current)
+ return false;
+
+ struct bch_opts opts = bch2_opts_empty();
+ opt_set(opts, nostart, true);
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, read_only, true);
+
+ struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c))
+ return false;
+
+ bool ret = ((bcachefs_metadata_version_current < kernel_version &&
+ kernel_version <= c->sb.version) ||
+ (c->sb.version <= kernel_version &&
+ kernel_version < bcachefs_metadata_version_current));
+
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "fsck binary is version ");
+ bch2_version_to_text(&buf, bcachefs_metadata_version_current);
+ prt_str(&buf, " but filesystem is ");
+ bch2_version_to_text(&buf, c->sb.version);
+ prt_str(&buf, " and kernel is ");
+ bch2_version_to_text(&buf, kernel_version);
+ prt_str(&buf, ", using kernel fsck\n");
+
+ printf("%s", buf.buf);
+ printbuf_exit(&buf);
+ }
+
+ bch2_fs_stop(c);
+
+ return ret;
+}
+
+static bool is_blockdev(const char *path)
+{
+ struct stat s;
+ if (stat(path, &s))
+ return true;
+ return S_ISBLK(s.st_mode);
+}
+
+static void loopdev_free(const char *path)
+{
+ char *cmd = mprintf("losetup -d %s", path);
+ system(cmd);
+ free(cmd);
+}
+
+static char *loopdev_alloc(const char *path)
+{
+ char *cmd = mprintf("losetup --show -f %s", path);
+ FILE *f = popen(cmd, "r");
+ free(cmd);
+ if (!f) {
+ fprintf(stderr, "error executing losetup: %m\n");
+ return NULL;
+ }
+
+ char *line = NULL;
+ size_t n = 0;
+ getline(&line, &n, f);
+ int ret = pclose(f);
+ if (ret) {
+ fprintf(stderr, "error executing losetup: %i\n", ret);
+ free(line);
+ return NULL;
+ }
+
+ strim(line);
+ return line;
+}
+
+int cmd_fsck(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "ratelimit_errors", no_argument, NULL, 'r' },
+ { "reconstruct_alloc", no_argument, NULL, 'R' },
+ { "kernel", no_argument, NULL, 'k' },
+ { "no-kernel", no_argument, NULL, 'K' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ int kernel = -1; /* unset */
+ int opt, ret = 0;
+ struct printbuf opts_str = PRINTBUF;
+
+ if (getenv("BCACHEFS_KERNEL_ONLY"))
+ kernel = true;
+
+ append_opt(&opts_str, "degraded");
+ append_opt(&opts_str, "fsck");
+ append_opt(&opts_str, "fix_errors=ask");
+ append_opt(&opts_str, "read_only");
+
+ while ((opt = getopt_long(argc, argv,
+ "apynfo:rRkKvh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'a': /* outdated alias for -p */
+ case 'p':
+ case 'y':
+ append_opt(&opts_str, "fix_errors=yes");
+ break;
+ case 'n':
+ append_opt(&opts_str, "nochanges");
+ append_opt(&opts_str, "fix_errors=no");
+ break;
+ case 'f':
+ /* force check, even if filesystem marked clean: */
+ break;
+ case 'o':
+ append_opt(&opts_str, optarg);
+ break;
+ case 'r':
+ append_opt(&opts_str, "ratelimit_errors");
+ break;
+ case 'R':
+ append_opt(&opts_str, "reconstruct_alloc");
+ break;
+ case 'k':
+ kernel = true;
+ break;
+ case 'K':
+ kernel = false;
+ break;
+ case 'v':
+ append_opt(&opts_str, "verbose");
+ break;
+ case 'h':
+ fsck_usage();
+ exit(16);
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ fprintf(stderr, "Please supply device(s) to check\n");
+ exit(8);
+ }
+
+ darray_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ darray_for_each(devs, i)
+ if (dev_mounted(*i)) {
+ printf("Running fsck online\n");
+ return fsck_online(*i, opts_str.buf);
+ }
+
+ int kernel_probed = kernel;
+ if (kernel_probed < 0)
+ kernel_probed = should_use_kernel_fsck(devs);
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct printbuf parse_later = PRINTBUF;
+
+ if (kernel_probed) {
+ darray_str loopdevs = {};
+ int fsck_fd = -1;
+
+ printf("Running in-kernel offline fsck\n");
+ struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) + sizeof(u64) * devs.nr, 1);
+
+ fsck->opts = (unsigned long)opts_str.buf;
+ darray_for_each(devs, i) {
+ if (is_blockdev(*i)) {
+ fsck->devs[i - devs.data] = (unsigned long) *i;
+ } else {
+ char *l = loopdev_alloc(*i);
+ if (!l)
+ goto kernel_fsck_err;
+ darray_push(&loopdevs, l);
+ fsck->devs[i - devs.data] = (unsigned long) l;
+ }
+ }
+ fsck->nr_devs = devs.nr;
+
+ int ctl_fd = bcachectl_open();
+ fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck);
+kernel_fsck_err:
+ free(fsck);
+
+ darray_for_each(loopdevs, i)
+ loopdev_free(*i);
+ darray_exit(&loopdevs);
+
+ if (fsck_fd < 0 && kernel < 0)
+ goto userland_fsck;
+
+ if (fsck_fd < 0)
+ die("BCH_IOCTL_FSCK_OFFLINE error: %s", bch2_err_str(errno));
+
+ ret = splice_fd_to_stdinout(fsck_fd);
+ } else {
+userland_fsck:
+ printf("Running userspace offline fsck\n");
+ ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf);
+ if (ret)
+ return ret;
+
+ struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c))
+ exit(8);
+
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+ fprintf(stderr, "%s: errors fixed\n", c->name);
+ ret |= 1;
+ }
+ if (test_bit(BCH_FS_error, &c->flags)) {
+ fprintf(stderr, "%s: still has errors\n", c->name);
+ ret |= 4;
+ }
+
+ bch2_fs_stop(c);
+ }
+
+ printbuf_exit(&opts_str);
+ return ret;
+}
diff --git a/c_src/cmd_fusemount.c b/c_src/cmd_fusemount.c
new file mode 100644
index 00000000..e5674b42
--- /dev/null
+++ b/c_src/cmd_fusemount.c
@@ -0,0 +1,1314 @@
+#ifdef BCACHEFS_FUSE
+
+#include <errno.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/statvfs.h>
+
+#include <fuse_lowlevel.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/io_read.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super.h"
+
+/* mode_to_type(): */
+#include "libbcachefs/fs.h"
+
+#include <linux/dcache.h>
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+/* used by write_aligned function for waiting on bch2_write closure */
+struct write_aligned_op_t {
+ struct closure cl;
+
+ /* must be last: */
+ struct bch_write_op op;
+};
+
+
+static inline subvol_inum map_root_ino(u64 ino)
+{
+ return (subvol_inum) { 1, ino == 1 ? 4096 : ino };
+}
+
+static inline u64 unmap_root_ino(u64 ino)
+{
+ return ino == 4096 ? 1 : ino;
+}
+
+static struct stat inode_to_stat(struct bch_fs *c,
+ struct bch_inode_unpacked *bi)
+{
+ return (struct stat) {
+ .st_ino = unmap_root_ino(bi->bi_inum),
+ .st_size = bi->bi_size,
+ .st_mode = bi->bi_mode,
+ .st_uid = bi->bi_uid,
+ .st_gid = bi->bi_gid,
+ .st_nlink = bch2_inode_nlink_get(bi),
+ .st_rdev = bi->bi_dev,
+ .st_blksize = block_bytes(c),
+ .st_blocks = bi->bi_sectors,
+ .st_atim = bch2_time_to_timespec(c, bi->bi_atime),
+ .st_mtim = bch2_time_to_timespec(c, bi->bi_mtime),
+ .st_ctim = bch2_time_to_timespec(c, bi->bi_ctime),
+ };
+}
+
+static struct fuse_entry_param inode_to_entry(struct bch_fs *c,
+ struct bch_inode_unpacked *bi)
+{
+ return (struct fuse_entry_param) {
+ .ino = unmap_root_ino(bi->bi_inum),
+ .generation = bi->bi_generation,
+ .attr = inode_to_stat(c, bi),
+ .attr_timeout = DBL_MAX,
+ .entry_timeout = DBL_MAX,
+ };
+}
+
+static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn)
+{
+ if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n");
+ conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+ } else
+ fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n");
+
+ //conn->want |= FUSE_CAP_POSIX_ACL;
+}
+
+static void bcachefs_fuse_destroy(void *arg)
+{
+ struct bch_fs *c = arg;
+
+ bch2_fs_stop(c);
+}
+
+static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ struct qstr qstr = QSTR(name);
+ subvol_inum inum;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n",
+ dir.inum, name);
+
+ ret = bch2_inode_find_by_inum(c, dir, &bi);
+ if (ret) {
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ struct bch_hash_info hash_info = bch2_hash_info_init(c, &bi);
+
+ ret = bch2_dirent_lookup(c, dir, &hash_info, &qstr, &inum);
+ if (ret) {
+ struct fuse_entry_param e = {
+ .attr_timeout = DBL_MAX,
+ .entry_timeout = DBL_MAX,
+ };
+ fuse_reply_entry(req, &e);
+ return;
+ }
+
+ ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret)
+ goto err;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n",
+ bi.bi_inum);
+
+ struct fuse_entry_param e = inode_to_entry(c, &bi);
+ fuse_reply_entry(req, &e);
+ return;
+err:
+ fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret);
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t ino,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ struct stat attr;
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n", inum.inum);
+
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret) {
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret);
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n");
+
+ attr = inode_to_stat(c, &bi);
+ fuse_reply_attr(req, &attr, DBL_MAX);
+}
+
+static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t ino,
+ struct stat *attr, int to_set,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked inode_u;
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ u64 now;
+ int ret;
+
+ subvol_inum inum = map_root_ino(ino);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_setattr(%llu, %x)\n", inum.inum, to_set);
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+ now = bch2_current_time(c);
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ if (to_set & FUSE_SET_ATTR_MODE)
+ inode_u.bi_mode = attr->st_mode;
+ if (to_set & FUSE_SET_ATTR_UID)
+ inode_u.bi_uid = attr->st_uid;
+ if (to_set & FUSE_SET_ATTR_GID)
+ inode_u.bi_gid = attr->st_gid;
+ if (to_set & FUSE_SET_ATTR_SIZE)
+ inode_u.bi_size = attr->st_size;
+ if (to_set & FUSE_SET_ATTR_ATIME)
+ inode_u.bi_atime = timespec_to_bch2_time(c, attr->st_atim);
+ if (to_set & FUSE_SET_ATTR_MTIME)
+ inode_u.bi_mtime = timespec_to_bch2_time(c, attr->st_mtim);
+ if (to_set & FUSE_SET_ATTR_ATIME_NOW)
+ inode_u.bi_atime = now;
+ if (to_set & FUSE_SET_ATTR_MTIME_NOW)
+ inode_u.bi_mtime = now;
+ /* TODO: CTIME? */
+
+ ret = bch2_inode_write(trans, &iter, &inode_u) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_put(trans);
+
+ if (!ret) {
+ *attr = inode_to_stat(c, &inode_u);
+ fuse_reply_attr(req, attr, DBL_MAX);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+}
+
+static int do_create(struct bch_fs *c, subvol_inum dir,
+ const char *name, mode_t mode, dev_t rdev,
+ struct bch_inode_unpacked *new_inode)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked dir_u;
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ bch2_inode_init_early(c, new_inode);
+
+ return bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_create_trans(trans,
+ dir, &dir_u,
+ new_inode, &qstr,
+ uid, gid, mode, rdev, NULL, NULL,
+ (subvol_inum) { 0 }, 0));
+}
+
+static void bcachefs_fuse_mknod(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name, mode_t mode,
+ dev_t rdev)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mknod(%llu, %s, %x, %x)\n",
+ dir.inum, name, mode, rdev);
+
+ ret = do_create(c, dir, name, mode, rdev, &new_inode);
+ if (ret)
+ goto err;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_entry(req, &e);
+ return;
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir,
+ const char *name, mode_t mode)
+{
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mkdir(%llu, %s, %x)\n",
+ dir, name, mode);
+
+ BUG_ON(mode & S_IFMT);
+
+ mode |= S_IFDIR;
+ bcachefs_fuse_mknod(req, dir, name, mode, 0);
+}
+
+static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dir_u, inode_u;
+ struct qstr qstr = QSTR(name);
+ subvol_inum dir = map_root_ino(dir_ino);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name);
+
+ int ret = bch2_trans_commit_do(c, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc,
+ bch2_unlink_trans(trans, dir, &dir_u,
+ &inode_u, &qstr, false));
+
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir,
+ const char *name)
+{
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_rmdir(%llu, %s)\n", dir, name);
+
+ bcachefs_fuse_unlink(req, dir, name);
+}
+
+static void bcachefs_fuse_rename(fuse_req_t req,
+ fuse_ino_t src_dir_ino, const char *srcname,
+ fuse_ino_t dst_dir_ino, const char *dstname,
+ unsigned flags)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dst_dir_u, src_dir_u;
+ struct bch_inode_unpacked src_inode_u, dst_inode_u;
+ struct qstr dst_name = QSTR(srcname);
+ struct qstr src_name = QSTR(dstname);
+ subvol_inum src_dir = map_root_ino(src_dir_ino);
+ subvol_inum dst_dir = map_root_ino(dst_dir_ino);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG,
+ "bcachefs_fuse_rename(%llu, %s, %llu, %s, %x)\n",
+ src_dir.inum, srcname, dst_dir.inum, dstname, flags);
+
+ /* XXX handle overwrites */
+ ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_rename_trans(trans,
+ src_dir, &src_dir_u,
+ dst_dir, &dst_dir_u,
+ &src_inode_u, &dst_inode_u,
+ &src_name, &dst_name,
+ BCH_RENAME));
+
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino,
+ fuse_ino_t newparent_ino, const char *newname)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked dir_u, inode_u;
+ struct qstr qstr = QSTR(newname);
+ subvol_inum newparent = map_root_ino(newparent_ino);
+ subvol_inum inum = map_root_ino(ino);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n",
+ inum.inum, newparent.inum, newname);
+
+ ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_link_trans(trans, newparent, &dir_u,
+ inum, &inode_u, &qstr));
+
+ if (!ret) {
+ struct fuse_entry_param e = inode_to_entry(c, &inode_u);
+ fuse_reply_entry(req, &e);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+}
+
+static void bcachefs_fuse_open(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ fi->direct_io = false;
+ fi->keep_cache = true;
+ fi->cache_readdir = true;
+
+ fuse_reply_open(req, fi);
+}
+
+static void userbio_init(struct bio *bio, struct bio_vec *bv,
+ void *buf, size_t size)
+{
+ bio_init(bio, NULL, bv, 1, 0);
+ bio->bi_iter.bi_size = size;
+ bv->bv_page = buf;
+ bv->bv_len = size;
+ bv->bv_offset = 0;
+}
+
+static int get_inode_io_opts(struct bch_fs *c, subvol_inum inum, struct bch_io_opts *opts)
+{
+ struct bch_inode_unpacked inode;
+ if (bch2_inode_find_by_inum(c, inum, &inode))
+ return -EINVAL;
+
+ bch2_inode_opts_get(opts, c, &inode);
+ return 0;
+}
+
+static void bcachefs_fuse_read_endio(struct bio *bio)
+{
+ closure_put(bio->bi_private);
+}
+
+
+static void bcachefs_fuse_write_endio(struct bch_write_op *op)
+{
+ struct write_aligned_op_t *w = container_of(op,struct write_aligned_op_t,op);
+ closure_put(&w->cl);
+}
+
+
+struct fuse_align_io {
+ off_t start;
+ size_t pad_start;
+ off_t end;
+ size_t pad_end;
+ size_t size;
+};
+
+/* Handle unaligned start and end */
+/* TODO: align to block_bytes, sector size, or page size? */
+static struct fuse_align_io align_io(const struct bch_fs *c, size_t size,
+ off_t offset)
+{
+ struct fuse_align_io align;
+
+ BUG_ON(offset < 0);
+
+ align.start = round_down(offset, block_bytes(c));
+ align.pad_start = offset - align.start;
+
+ off_t end = offset + size;
+ align.end = round_up(end, block_bytes(c));
+ align.pad_end = align.end - end;
+
+ align.size = align.end - align.start;
+
+ return align;
+}
+
+/*
+ * Given an aligned number of bytes transferred, figure out how many unaligned
+ * bytes were transferred.
+ */
+static size_t align_fix_up_bytes(const struct fuse_align_io *align,
+ size_t align_bytes)
+{
+ size_t bytes = 0;
+
+ if (align_bytes > align->pad_start) {
+ bytes = align_bytes - align->pad_start;
+ bytes = bytes > align->pad_end ? bytes - align->pad_end : 0;
+ }
+
+ return bytes;
+}
+
+/*
+ * Read aligned data.
+ */
+static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size,
+ off_t aligned_offset, void *buf)
+{
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ struct bch_io_opts io_opts;
+ if (get_inode_io_opts(c, inum, &io_opts))
+ return -ENOENT;
+
+ struct bch_read_bio rbio;
+ struct bio_vec bv;
+ userbio_init(&rbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC);
+ rbio.bio.bi_iter.bi_sector = aligned_offset >> 9;
+
+ struct closure cl;
+ closure_init_stack(&cl);
+
+ closure_get(&cl);
+ rbio.bio.bi_end_io = bcachefs_fuse_read_endio;
+ rbio.bio.bi_private = &cl;
+
+ bch2_read(c, rbio_init(&rbio.bio, io_opts), inum);
+
+ closure_sync(&cl);
+
+ return -blk_status_to_errno(rbio.bio.bi_status);
+}
+
+static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t ino,
+ size_t size, off_t offset,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n",
+ inum, size, offset);
+
+ /* Check inode size. */
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret) {
+ fuse_reply_err(req, -ret);
+ return;
+ }
+
+ off_t end = min_t(u64, bi.bi_size, offset + size);
+ if (end <= offset) {
+ fuse_reply_buf(req, NULL, 0);
+ return;
+ }
+ size = end - offset;
+
+ struct fuse_align_io align = align_io(c, size, offset);
+
+ void *buf = aligned_alloc(PAGE_SIZE, align.size);
+ if (!buf) {
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+
+ if (likely(!ret))
+ fuse_reply_buf(req, buf + align.pad_start, size);
+ else
+ fuse_reply_err(req, -ret);
+
+ free(buf);
+}
+
+static int inode_update_times(struct bch_fs *c, subvol_inum inum)
+{
+ struct btree_trans *trans;
+ struct btree_iter iter;
+ struct bch_inode_unpacked inode_u;
+ int ret = 0;
+ u64 now;
+
+ trans = bch2_trans_get(c);
+retry:
+ bch2_trans_begin(trans);
+ now = bch2_current_time(c);
+
+ ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ inode_u.bi_mtime = now;
+ inode_u.bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &iter, &inode_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ if (ret == -EINTR)
+ goto retry;
+
+ bch2_trans_put(trans);
+ return ret;
+}
+
+static int write_aligned(struct bch_fs *c, subvol_inum inum,
+ struct bch_io_opts io_opts, void *buf,
+ size_t aligned_size, off_t aligned_offset,
+ off_t new_i_size, size_t *written_out)
+{
+
+ struct write_aligned_op_t w = { 0 }
+;
+ struct bch_write_op *op = &w.op;
+ struct bio_vec bv;
+
+ BUG_ON(aligned_size & (block_bytes(c) - 1));
+ BUG_ON(aligned_offset & (block_bytes(c) - 1));
+
+ *written_out = 0;
+
+ closure_init_stack(&w.cl);
+
+ bch2_write_op_init(op, c, io_opts); /* XXX reads from op?! */
+ op->write_point = writepoint_hashed(0);
+ op->nr_replicas = io_opts.data_replicas;
+ op->target = io_opts.foreground_target;
+ op->subvol = inum.subvol;
+ op->pos = POS(inum.inum, aligned_offset >> 9);
+ op->new_i_size = new_i_size;
+ op->end_io = bcachefs_fuse_write_endio;
+
+ userbio_init(&op->wbio.bio, &bv, buf, aligned_size);
+ bio_set_op_attrs(&op->wbio.bio, REQ_OP_WRITE, REQ_SYNC);
+
+ if (bch2_disk_reservation_get(c, &op->res, aligned_size >> 9,
+ op->nr_replicas, 0)) {
+ /* XXX: use check_range_allocated like dio write path */
+ return -ENOSPC;
+ }
+
+ closure_get(&w.cl);
+
+ closure_call(&op->cl, bch2_write, NULL, NULL);
+
+ closure_sync(&w.cl);
+
+ if (!op->error)
+ *written_out = op->written << 9;
+
+ return op->error;
+}
+
+static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t ino,
+ const char *buf, size_t size,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_io_opts io_opts;
+ size_t aligned_written;
+ int ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n",
+ inum, size, offset);
+
+ struct fuse_align_io align = align_io(c, size, offset);
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+ BUG_ON(!aligned_buf);
+
+ if (get_inode_io_opts(c, inum, &io_opts)) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ /* Realign the data and read in start and end, if needed */
+
+ /* Read partial start data. */
+ if (align.pad_start) {
+ memset(aligned_buf, 0, block_bytes(c));
+
+ ret = read_aligned(c, inum, block_bytes(c), align.start,
+ aligned_buf);
+ if (ret)
+ goto err;
+ }
+
+ /*
+ * Read partial end data. If the whole write fits in one block, the
+ * start data and the end data are the same so this isn't needed.
+ */
+ if (align.pad_end &&
+ !(align.pad_start && align.size == block_bytes(c))) {
+ off_t partial_end_start = align.end - block_bytes(c);
+ size_t buf_offset = align.size - block_bytes(c);
+
+ memset(aligned_buf + buf_offset, 0, block_bytes(c));
+
+ ret = read_aligned(c, inum, block_bytes(c), partial_end_start,
+ aligned_buf + buf_offset);
+ if (ret)
+ goto err;
+ }
+
+ /* Overlay what we want to write. */
+ memcpy(aligned_buf + align.pad_start, buf, size);
+
+ /* Actually write. */
+ ret = write_aligned(c, inum, io_opts, aligned_buf,
+ align.size, align.start,
+ offset + size, &aligned_written);
+
+ /* Figure out how many unaligned bytes were written. */
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written > size);
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n",
+ written);
+
+ if (written > 0)
+ ret = 0;
+
+ /*
+ * Update inode times.
+ * TODO: Integrate with bch2_extent_update()
+ */
+ if (!ret)
+ ret = inode_update_times(c, inum);
+
+ if (!ret) {
+ BUG_ON(written == 0);
+ fuse_reply_write(req, written);
+ free(aligned_buf);
+ return;
+ }
+
+err:
+ fuse_reply_err(req, -ret);
+ free(aligned_buf);
+}
+
+static void bcachefs_fuse_symlink(fuse_req_t req, const char *link,
+ fuse_ino_t dir_ino, const char *name)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ size_t link_len = strlen(link);
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_symlink(%s, %llu, %s)\n",
+ link, dir.inum, name);
+
+ ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode);
+ if (ret)
+ goto err;
+
+ struct bch_io_opts io_opts;
+ ret = get_inode_io_opts(c, dir, &io_opts);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, link_len + 1, 0);
+
+ void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size);
+ BUG_ON(!aligned_buf);
+
+ memset(aligned_buf, 0, align.size);
+ memcpy(aligned_buf, link, link_len); /* already terminated */
+
+ subvol_inum inum = (subvol_inum) { dir.subvol, new_inode.bi_inum };
+
+ size_t aligned_written;
+ ret = write_aligned(c, inum, io_opts, aligned_buf,
+ align.size, align.start, link_len + 1,
+ &aligned_written);
+ free(aligned_buf);
+
+ if (ret)
+ goto err;
+
+ size_t written = align_fix_up_bytes(&align, aligned_written);
+ BUG_ON(written != link_len + 1); // TODO: handle short
+
+ ret = inode_update_times(c, inum);
+ if (ret)
+ goto err;
+
+ new_inode.bi_size = written;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_entry(req, &e);
+ return;
+
+err:
+ fuse_reply_err(req, -ret);
+}
+
+static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t ino)
+{
+ subvol_inum inum = map_root_ino(ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ char *buf = NULL;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readlink(%llu)\n", inum.inum);
+
+ struct bch_inode_unpacked bi;
+ int ret = bch2_inode_find_by_inum(c, inum, &bi);
+ if (ret)
+ goto err;
+
+ struct fuse_align_io align = align_io(c, bi.bi_size, 0);
+
+ ret = -ENOMEM;
+ buf = aligned_alloc(PAGE_SIZE, align.size);
+ if (!buf)
+ goto err;
+
+ ret = read_aligned(c, inum, align.size, align.start, buf);
+ if (ret)
+ goto err;
+
+ BUG_ON(buf[align.size - 1] != 0);
+
+ fuse_reply_readlink(req, buf);
+
+err:
+ if (ret)
+ fuse_reply_err(req, -ret);
+
+ free(buf);
+}
+
+#if 0
+/*
+ * FUSE flush is essentially the close() call, however it is not guaranteed
+ * that one flush happens per open/create.
+ *
+ * It doesn't have to do anything, and is mostly relevant for NFS-style
+ * filesystems where close has some relationship to caching.
+ */
+static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_release(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsync(fuse_req_t req, fuse_ino_t inum, int datasync,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+struct fuse_dir_context {
+ struct dir_context ctx;
+ fuse_req_t req;
+ char *buf;
+ size_t bufsize;
+};
+
+struct fuse_dirent {
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
+};
+
+#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+static size_t fuse_add_direntry2(char *buf, size_t bufsize,
+ const char *name, int namelen,
+ const struct stat *stbuf, off_t off)
+{
+ size_t entlen = FUSE_NAME_OFFSET + namelen;
+ size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen);
+ struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
+
+ if ((buf == NULL) || (entlen_padded > bufsize))
+ return entlen_padded;
+
+ dirent->ino = stbuf->st_ino;
+ dirent->off = off;
+ dirent->namelen = namelen;
+ dirent->type = (stbuf->st_mode & S_IFMT) >> 12;
+ memcpy(dirent->name, name, namelen);
+ memset(dirent->name + namelen, 0, entlen_padded - entlen);
+
+ return entlen_padded;
+}
+
+static int fuse_filldir(struct dir_context *_ctx,
+ const char *name, int namelen,
+ loff_t pos, u64 ino, unsigned type)
+{
+ struct fuse_dir_context *ctx =
+ container_of(_ctx, struct fuse_dir_context, ctx);
+
+ struct stat statbuf = {
+ .st_ino = unmap_root_ino(ino),
+ .st_mode = type << 12,
+ };
+
+ fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n",
+ name, statbuf.st_ino, pos);
+
+ size_t len = fuse_add_direntry2(ctx->buf,
+ ctx->bufsize,
+ name,
+ namelen,
+ &statbuf,
+ pos + 1);
+
+ if (len > ctx->bufsize)
+ return -1;
+
+ ctx->buf += len;
+ ctx->bufsize -= len;
+
+ return 0;
+}
+
+static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir)
+{
+ if (ctx->ctx.pos == 0) {
+ if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos,
+ dir, DT_DIR) < 0)
+ return false;
+ ctx->ctx.pos = 1;
+ }
+
+ if (ctx->ctx.pos == 1) {
+ if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos,
+ /*TODO: parent*/ 1, DT_DIR) < 0)
+ return false;
+ ctx->ctx.pos = 2;
+ }
+
+ return true;
+}
+
+static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir_ino,
+ size_t size, off_t off,
+ struct fuse_file_info *fi)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked bi;
+ char *buf = calloc(size, 1);
+ struct fuse_dir_context ctx = {
+ .ctx.actor = fuse_filldir,
+ .ctx.pos = off,
+ .req = req,
+ .buf = buf,
+ .bufsize = size,
+ };
+ int ret = 0;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir(dir=%llu, size=%zu, "
+ "off=%lld)\n", dir.inum, size, off);
+
+ ret = bch2_inode_find_by_inum(c, dir, &bi);
+ if (ret)
+ goto reply;
+
+ if (!S_ISDIR(bi.bi_mode)) {
+ ret = -ENOTDIR;
+ goto reply;
+ }
+
+ if (!handle_dots(&ctx, dir.inum))
+ goto reply;
+
+ ret = bch2_readdir(c, dir, &ctx.ctx);
+reply:
+ if (!ret) {
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n",
+ ctx.buf - buf);
+ fuse_reply_buf(req, buf, ctx.buf - buf);
+ } else {
+ fuse_reply_err(req, -ret);
+ }
+
+ free(buf);
+}
+
+#if 0
+static void bcachefs_fuse_readdirplus(fuse_req_t req, fuse_ino_t dir,
+ size_t size, off_t off,
+ struct fuse_file_info *fi)
+{
+
+}
+
+static void bcachefs_fuse_releasedir(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fsyncdir(fuse_req_t req, fuse_ino_t inum, int datasync,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_statfs(fuse_req_t req, fuse_ino_t inum)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
+ unsigned shift = c->block_bits;
+ struct statvfs statbuf = {
+ .f_bsize = block_bytes(c),
+ .f_frsize = block_bytes(c),
+ .f_blocks = usage.capacity >> shift,
+ .f_bfree = (usage.capacity - usage.used) >> shift,
+ //.f_bavail = statbuf.f_bfree,
+ .f_files = usage.nr_inodes,
+ .f_ffree = U64_MAX,
+ .f_namemax = BCH_NAME_MAX,
+ };
+
+ fuse_reply_statfs(req, &statbuf);
+}
+
+#if 0
+static void bcachefs_fuse_setxattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name, const char *value,
+ size_t size, int flags)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_getxattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name, size_t size)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+
+ fuse_reply_xattr(req, );
+}
+
+static void bcachefs_fuse_listxattr(fuse_req_t req, fuse_ino_t inum, size_t size)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_removexattr(fuse_req_t req, fuse_ino_t inum,
+ const char *name)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static void bcachefs_fuse_create(fuse_req_t req, fuse_ino_t dir_ino,
+ const char *name, mode_t mode,
+ struct fuse_file_info *fi)
+{
+ subvol_inum dir = map_root_ino(dir_ino);
+ struct bch_fs *c = fuse_req_userdata(req);
+ struct bch_inode_unpacked new_inode;
+ int ret;
+
+ fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_create(%llu, %s, %x)\n",
+ dir.inum, name, mode);
+
+ ret = do_create(c, dir, name, mode, 0, &new_inode);
+ if (ret)
+ goto err;
+
+ struct fuse_entry_param e = inode_to_entry(c, &new_inode);
+ fuse_reply_create(req, &e, fi);
+ return;
+err:
+ fuse_reply_err(req, -ret);
+}
+
+#if 0
+static void bcachefs_fuse_write_buf(fuse_req_t req, fuse_ino_t inum,
+ struct fuse_bufvec *bufv, off_t off,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+
+static void bcachefs_fuse_fallocate(fuse_req_t req, fuse_ino_t inum, int mode,
+ off_t offset, off_t length,
+ struct fuse_file_info *fi)
+{
+ struct bch_fs *c = fuse_req_userdata(req);
+}
+#endif
+
+static const struct fuse_lowlevel_ops bcachefs_fuse_ops = {
+ .init = bcachefs_fuse_init,
+ .destroy = bcachefs_fuse_destroy,
+ .lookup = bcachefs_fuse_lookup,
+ .getattr = bcachefs_fuse_getattr,
+ .setattr = bcachefs_fuse_setattr,
+ .readlink = bcachefs_fuse_readlink,
+ .mknod = bcachefs_fuse_mknod,
+ .mkdir = bcachefs_fuse_mkdir,
+ .unlink = bcachefs_fuse_unlink,
+ .rmdir = bcachefs_fuse_rmdir,
+ .symlink = bcachefs_fuse_symlink,
+ .rename = bcachefs_fuse_rename,
+ .link = bcachefs_fuse_link,
+ .open = bcachefs_fuse_open,
+ .read = bcachefs_fuse_read,
+ .write = bcachefs_fuse_write,
+ //.flush = bcachefs_fuse_flush,
+ //.release = bcachefs_fuse_release,
+ //.fsync = bcachefs_fuse_fsync,
+ //.opendir = bcachefs_fuse_opendir,
+ .readdir = bcachefs_fuse_readdir,
+ //.readdirplus = bcachefs_fuse_readdirplus,
+ //.releasedir = bcachefs_fuse_releasedir,
+ //.fsyncdir = bcachefs_fuse_fsyncdir,
+ .statfs = bcachefs_fuse_statfs,
+ //.setxattr = bcachefs_fuse_setxattr,
+ //.getxattr = bcachefs_fuse_getxattr,
+ //.listxattr = bcachefs_fuse_listxattr,
+ //.removexattr = bcachefs_fuse_removexattr,
+ .create = bcachefs_fuse_create,
+
+ /* posix locks: */
+#if 0
+ .getlk = bcachefs_fuse_getlk,
+ .setlk = bcachefs_fuse_setlk,
+#endif
+ //.write_buf = bcachefs_fuse_write_buf,
+ //.fallocate = bcachefs_fuse_fallocate,
+
+};
+
+/*
+ * Setup and command parsing.
+ */
+
+struct bf_context {
+ char *devices_str;
+ char **devices;
+ int nr_devices;
+};
+
+static void bf_context_free(struct bf_context *ctx)
+{
+ int i;
+
+ free(ctx->devices_str);
+ for (i = 0; i < ctx->nr_devices; ++i)
+ free(ctx->devices[i]);
+ free(ctx->devices);
+}
+
+static struct fuse_opt bf_opts[] = {
+ FUSE_OPT_END
+};
+
+/*
+ * Fuse option parsing helper -- returning 0 means we consumed the argument, 1
+ * means we did not.
+ */
+static int bf_opt_proc(void *data, const char *arg, int key,
+ struct fuse_args *outargs)
+{
+ struct bf_context *ctx = data;
+
+ switch (key) {
+ case FUSE_OPT_KEY_NONOPT:
+ /* Just extract the first non-option string. */
+ if (!ctx->devices_str) {
+ ctx->devices_str = strdup(arg);
+ return 0;
+ }
+ return 1;
+ }
+
+ return 1;
+}
+
+/*
+ * dev1:dev2 -> [ dev1, dev2 ]
+ * dev -> [ dev ]
+ */
+static void tokenize_devices(struct bf_context *ctx)
+{
+ char *devices_str = strdup(ctx->devices_str);
+ char *devices_tmp = devices_str;
+ char **devices = NULL;
+ int nr = 0;
+ char *dev = NULL;
+
+ while ((dev = strsep(&devices_tmp, ":"))) {
+ if (strlen(dev) > 0) {
+ devices = realloc(devices, (nr + 1) * sizeof *devices);
+ devices[nr] = strdup(dev);
+ nr++;
+ }
+ }
+
+ if (!devices) {
+ devices = malloc(sizeof *devices);
+ devices[0] = strdup(ctx->devices_str);
+ nr = 1;
+ }
+
+ ctx->devices = devices;
+ ctx->nr_devices = nr;
+
+ free(devices_str);
+}
+
+static void usage(char *argv[])
+{
+ printf("Usage: %s fusemount [options] <dev>[:dev2:...] <mountpoint>\n",
+ argv[0]);
+ printf("\n");
+}
+
+int cmd_fusemount(int argc, char *argv[])
+{
+ struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
+ struct bch_opts bch_opts = bch2_opts_empty();
+ struct bf_context ctx = { 0 };
+ struct bch_fs *c = NULL;
+ struct fuse_session *se = NULL;
+ int ret = 0, i;
+
+ /* Parse arguments. */
+ if (fuse_opt_parse(&args, &ctx, bf_opts, bf_opt_proc) < 0)
+ die("fuse_opt_parse err: %m");
+
+ struct fuse_cmdline_opts fuse_opts;
+ if (fuse_parse_cmdline(&args, &fuse_opts) < 0)
+ die("fuse_parse_cmdline err: %m");
+
+ if (fuse_opts.show_help) {
+ usage(argv);
+ fuse_cmdline_help();
+ fuse_lowlevel_help();
+ ret = 0;
+ goto out;
+ }
+ if (fuse_opts.show_version) {
+ printf("FUSE library version %s\n", fuse_pkgversion());
+ fuse_lowlevel_version();
+ printf("bcachefs version: %s\n", VERSION_STRING);
+ ret = 0;
+ goto out;
+ }
+ if (!fuse_opts.mountpoint) {
+ usage(argv);
+ printf("Please supply a mountpoint.\n");
+ ret = 1;
+ goto out;
+ }
+ if (!ctx.devices_str) {
+ usage(argv);
+ printf("Please specify a device or device1:device2:...\n");
+ ret = 1;
+ goto out;
+ }
+ tokenize_devices(&ctx);
+
+ struct printbuf fsname = PRINTBUF;
+ prt_printf(&fsname, "fsname=");
+ for (i = 0; i < ctx.nr_devices; ++i) {
+ if (i)
+ prt_str(&fsname, ":");
+ prt_str(&fsname, ctx.devices[i]);
+ }
+
+ fuse_opt_add_arg(&args, "-o");
+ fuse_opt_add_arg(&args, fsname.buf);
+
+ /* Open bch */
+ printf("Opening bcachefs filesystem on:\n");
+ for (i = 0; i < ctx.nr_devices; ++i)
+ printf("\t%s\n", ctx.devices[i]);
+
+ c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", ctx.devices_str,
+ bch2_err_str(PTR_ERR(c)));
+
+ /* Fuse */
+ se = fuse_session_new(&args, &bcachefs_fuse_ops,
+ sizeof(bcachefs_fuse_ops), c);
+ if (!se) {
+ fprintf(stderr, "fuse_lowlevel_new err: %m\n");
+ goto err;
+ }
+
+ if (fuse_set_signal_handlers(se) < 0) {
+ fprintf(stderr, "fuse_set_signal_handlers err: %m\n");
+ goto err;
+ }
+
+ if (fuse_session_mount(se, fuse_opts.mountpoint)) {
+ fprintf(stderr, "fuse_mount err: %m\n");
+ goto err;
+ }
+
+ /* This print statement is a trigger for tests. */
+ printf("Fuse mount initialized.\n");
+
+ if (fuse_opts.foreground == 0){
+ printf("Fuse forcing to foreground mode, due gcc constructors usage.\n");
+ fuse_opts.foreground = 1;
+ }
+
+ fuse_daemonize(fuse_opts.foreground);
+
+ ret = fuse_session_loop(se);
+
+out:
+ if (se) {
+ fuse_session_unmount(se);
+ fuse_remove_signal_handlers(se);
+ fuse_session_destroy(se);
+ }
+
+ free(fuse_opts.mountpoint);
+ fuse_opt_free_args(&args);
+ bf_context_free(&ctx);
+
+ return ret ? 1 : 0;
+
+err:
+ bch2_fs_stop(c);
+ goto out;
+}
+
+#endif /* BCACHEFS_FUSE */
diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c
new file mode 100644
index 00000000..adb0ac8d
--- /dev/null
+++ b/c_src/cmd_key.c
@@ -0,0 +1,161 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+static void unlock_usage(void)
+{
+ puts("bcachefs unlock - unlock an encrypted filesystem so it can be mounted\n"
+ "Usage: bcachefs unlock [OPTION] device\n"
+ "\n"
+ "Options:\n"
+ " -c Check if a device is encrypted\n"
+ " -k (session|user|user_session)\n"
+ " Keyring to add to (default: user)\n"
+ " -f Passphrase file to read from (disables passphrase prompt)\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_unlock(int argc, char *argv[])
+{
+ const char *keyring = "user";
+ bool check = false;
+ const char *passphrase_file_path = NULL;
+ char *passphrase = NULL;
+
+ int opt;
+
+ while ((opt = getopt(argc, argv, "cf:k:h")) != -1)
+ switch (opt) {
+ case 'c':
+ check = true;
+ break;
+ case 'k':
+ keyring = strdup(optarg);
+ break;
+ case 'f':
+ passphrase_file_path = strdup(optarg);
+ break;
+ case 'h':
+ unlock_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ char *dev = arg_pop();
+ if (!dev)
+ die("Please supply a device");
+
+ if (argc)
+ die("Too many arguments");
+
+ struct bch_opts opts = bch2_opts_empty();
+
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(dev, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
+
+ if (!bch2_sb_is_encrypted(sb.sb))
+ die("%s is not encrypted", dev);
+
+ if (check)
+ exit(EXIT_SUCCESS);
+ if (passphrase_file_path){
+ passphrase = read_file_str(AT_FDCWD, passphrase_file_path);
+ } else {
+ passphrase = read_passphrase("Enter passphrase: ");
+ }
+
+ bch2_add_key(sb.sb, "user", keyring, passphrase);
+
+ bch2_free_super(&sb);
+ memzero_explicit(passphrase, strlen(passphrase));
+ free(passphrase);
+ return 0;
+}
+
+int cmd_set_passphrase(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c;
+
+ if (argc < 2)
+ die("Please supply one or more devices");
+
+ opt_set(opts, nostart, true);
+
+ /*
+ * we use bch2_fs_open() here, instead of just reading the superblock,
+ * to make sure we're opening and updating every component device:
+ */
+
+ c = bch2_fs_open(argv + 1, argc - 1, opts);
+ if (IS_ERR(c))
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
+
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ die("Filesystem does not have encryption enabled");
+
+ struct bch_encrypted_key new_key;
+ new_key.magic = BCH_KEY_MAGIC;
+
+ int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+ if (ret)
+ die("Error getting current key");
+
+ char *new_passphrase = read_passphrase_twice("Enter new passphrase: ");
+ struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase);
+
+ if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb),
+ &new_key, sizeof(new_key)))
+ die("error encrypting key");
+ crypt->key = new_key;
+
+ bch2_revoke_key(c->disk_sb.sb);
+ bch2_write_super(c);
+ bch2_fs_stop(c);
+ return 0;
+}
+
+int cmd_remove_passphrase(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c;
+
+ if (argc < 2)
+ die("Please supply one or more devices");
+
+ opt_set(opts, nostart, true);
+ c = bch2_fs_open(argv + 1, argc - 1, opts);
+ if (IS_ERR(c))
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
+
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt);
+ if (!crypt)
+ die("Filesystem does not have encryption enabled");
+
+ struct bch_encrypted_key new_key;
+ new_key.magic = BCH_KEY_MAGIC;
+
+ int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key);
+ if (ret)
+ die("Error getting current key");
+
+ crypt->key = new_key;
+
+ bch2_write_super(c);
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c
new file mode 100644
index 00000000..c8f43150
--- /dev/null
+++ b/c_src/cmd_kill_btree_node.c
@@ -0,0 +1,140 @@
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/sb-members.h"
+#include "libbcachefs/super.h"
+
+static void kill_btree_node_usage(void)
+{
+ puts("bcachefs kill_btree_node - make btree nodes unreadable\n"
+ "Usage: bcachefs kill_btree_node [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -b (extents|inodes|dirents|xattrs) Btree to delete from\n"
+ " -l level Levle to delete from (0 == leaves)\n"
+ " -i index Index of btree node to kill\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+struct kill_node {
+ unsigned btree;
+ unsigned level;
+ u64 idx;
+};
+
+int cmd_kill_btree_node(int argc, char *argv[])
+{
+ struct bch_opts opts = bch2_opts_empty();
+ DARRAY(struct kill_node) kill_nodes = {};
+ int opt;
+
+ opt_set(opts, read_only, true);
+
+ while ((opt = getopt(argc, argv, "n:h")) != -1)
+ switch (opt) {
+ case 'n': {
+ char *p = optarg;
+ const char *str_btree = strsep(&p, ":");
+ const char *str_level = strsep(&p, ":");
+ const char *str_idx = strsep(&p, ":");
+
+ struct kill_node n = {
+ .btree = read_string_list_or_die(str_btree,
+ __bch2_btree_ids, "btree id"),
+ };
+
+ if (str_level &&
+ (kstrtouint(str_level, 10, &n.level) || n.level >= BTREE_MAX_DEPTH))
+ die("invalid level");
+
+ if (str_idx &&
+ kstrtoull(str_idx, 10, &n.idx))
+ die("invalid index %s", str_idx);
+
+ darray_push(&kill_nodes, n);
+ break;
+ }
+ case 'h':
+ kill_btree_node_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!argc)
+ die("Please supply device(s)");
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
+
+ int ret;
+ void *zeroes;
+
+ ret = posix_memalign(&zeroes, c->opts.block_size, c->opts.block_size);
+ if (ret)
+ die("error %s from posix_memalign", bch2_err_str(ret));
+
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ darray_for_each(kill_nodes, i) {
+ ret = __for_each_btree_node(trans, iter, i->btree, POS_MIN, 0, i->level, 0, b, ({
+ if (b->c.level != i->level)
+ continue;
+
+ int ret2 = 0;
+ if (!i->idx) {
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+ bch_info(c, "killing btree node %s l=%u %s",
+ bch2_btree_id_str(i->btree), i->level, buf.buf);
+ printbuf_exit(&buf);
+
+ ret2 = 1;
+
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key));
+ bkey_for_each_ptr(ptrs, ptr) {
+ struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev);
+ if (!ca)
+ continue;
+
+ int ret3 = pwrite(ca->disk_sb.bdev->bd_fd, zeroes,
+ c->opts.block_size, ptr->offset << 9);
+ bch2_dev_put(ca);
+ if (ret3 != c->opts.block_size) {
+ bch_err(c, "pwrite error: expected %u got %i %s",
+ c->opts.block_size, ret, strerror(errno));
+ ret2 = EXIT_FAILURE;
+ }
+ }
+ }
+
+ i->idx--;
+ ret2;
+ }));
+
+ if (ret < 0) {
+ bch_err(c, "error %i walking btree nodes", ret);
+ break;
+ } else if (!ret) {
+ bch_err(c, "node at specified index not found");
+ ret = EXIT_FAILURE;
+ break;
+ }
+ }
+
+ bch2_trans_put(trans);
+ bch2_fs_stop(c);
+ darray_exit(&kill_nodes);
+ return ret < 0 ? ret : 0;
+}
diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c
new file mode 100644
index 00000000..fe7f9b05
--- /dev/null
+++ b/c_src/cmd_list_journal.c
@@ -0,0 +1,306 @@
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "tools-util.h"
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/error.h"
+#include "libbcachefs/journal_io.h"
+#include "libbcachefs/journal_seq_blacklist.h"
+#include "libbcachefs/super.h"
+
+static const char *NORMAL = "\x1B[0m";
+static const char *RED = "\x1B[31m";
+
+static void list_journal_usage(void)
+{
+ puts("bcachefs list_journal - print contents of journal\n"
+ "Usage: bcachefs list_journal [OPTION]... <devices>\n"
+ "\n"
+ "Options:\n"
+ " -a Read entire journal, not just dirty entries\n"
+ " -n, --nr-entries=nr Number of journal entries to print, starting from the most recent\n"
+ " -t, --transaction-filter=bbpos Filter transactions not updating <bbpos>\n"
+ " Or entries not matching the range <bbpos-bbpos>\n"
+ " -k, --key-filter=btree Filter keys not updating btree\n"
+ " -v, --verbose Verbose mode\n"
+ " -h, --help Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static void star_start_of_lines(char *buf)
+{
+ char *p = buf;
+
+ if (*p == ' ')
+ *p = '*';
+
+ while ((p = strstr(p, "\n ")))
+ p[1] = '*';
+}
+
+static inline bool entry_is_transaction_start(struct jset_entry *entry)
+{
+ return entry->type == BCH_JSET_ENTRY_log && !entry->level;
+}
+
+typedef DARRAY(struct bbpos_range) d_bbpos_range;
+typedef DARRAY(enum btree_id) d_btree_id;
+
+static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry, struct bkey_i *k)
+{
+ darray_for_each(filter, i) {
+ struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k));
+ struct bbpos k_end = BBPOS(entry->btree_id, k->k.p);
+
+ if (bbpos_cmp(k_start, i->end) < 0 &&
+ bbpos_cmp(k_end, i->start) > 0)
+ return true;
+ }
+ return false;
+}
+
+static bool entry_matches_transaction_filter(struct jset_entry *entry,
+ d_bbpos_range filter)
+{
+ if (entry->type == BCH_JSET_ENTRY_btree_root ||
+ entry->type == BCH_JSET_ENTRY_btree_keys ||
+ entry->type == BCH_JSET_ENTRY_overwrite)
+ jset_entry_for_each_key(entry, k)
+ if (bkey_matches_filter(filter, entry, k))
+ return true;
+ return false;
+}
+
+static bool should_print_transaction(struct jset_entry *entry, struct jset_entry *end,
+ darray_str msg_filter,
+ d_bbpos_range key_filter)
+{
+ struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry);
+ unsigned b = jset_entry_log_msg_bytes(l);
+
+ darray_for_each(msg_filter, i)
+ if (!strncmp(*i, l->d, b))
+ return false;
+
+ if (!key_filter.nr)
+ return true;
+
+ for (entry = vstruct_next(entry);
+ entry != end && !entry_is_transaction_start(entry);
+ entry = vstruct_next(entry))
+ if (entry_matches_transaction_filter(entry, key_filter))
+ return true;
+
+ return false;
+}
+
+static bool should_print_entry(struct jset_entry *entry, d_btree_id filter)
+{
+ if (!filter.nr)
+ return true;
+
+ if (entry->type != BCH_JSET_ENTRY_btree_root &&
+ entry->type != BCH_JSET_ENTRY_btree_keys &&
+ entry->type != BCH_JSET_ENTRY_overwrite)
+ return true;
+
+ jset_entry_for_each_key(entry, k)
+ darray_for_each(filter, id)
+ if (entry->btree_id == *id)
+ return true;
+
+ return false;
+}
+
+static void journal_entry_header_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct journal_replay *p, bool blacklisted)
+{
+ if (blacklisted)
+ prt_str(out, "blacklisted ");
+
+ prt_printf(out,
+ "journal entry %llu\n"
+ " version %u\n"
+ " last seq %llu\n"
+ " flush %u\n"
+ " written at ",
+ le64_to_cpu(p->j.seq),
+ le32_to_cpu(p->j.version),
+ le64_to_cpu(p->j.last_seq),
+ !JSET_NO_FLUSH(&p->j));
+ bch2_journal_ptrs_to_text(out, c, p);
+
+ if (blacklisted)
+ star_start_of_lines(out->buf);
+}
+
+static void journal_entry_header_print(struct bch_fs *c, struct journal_replay *p, bool blacklisted)
+{
+ struct printbuf buf = PRINTBUF;
+ journal_entry_header_to_text(&buf, c, p, blacklisted);
+ printf("%s\n", buf.buf);
+ printbuf_exit(&buf);
+}
+
+static void journal_entries_print(struct bch_fs *c, unsigned nr_entries,
+ darray_str transaction_msg_filter,
+ d_bbpos_range transaction_key_filter,
+ d_btree_id key_filter)
+{
+ struct journal_replay *p, **_p;
+ struct genradix_iter iter;
+ struct printbuf buf = PRINTBUF;
+
+ genradix_for_each(&c->journal_entries, iter, _p) {
+ bool printed_header = false;
+
+ p = *_p;
+ if (!p)
+ continue;
+
+ if (le64_to_cpu(p->j.seq) + nr_entries < atomic64_read(&c->journal.seq))
+ continue;
+
+ bool blacklisted = p->ignore_blacklisted ||
+ bch2_journal_seq_is_blacklisted(c,
+ le64_to_cpu(p->j.seq), false);
+
+ if (!transaction_msg_filter.nr &&
+ !transaction_key_filter.nr) {
+ journal_entry_header_print(c, p, blacklisted);
+ printed_header = true;
+ }
+
+ struct jset_entry *entry = p->j.start;
+ struct jset_entry *end = vstruct_last(&p->j);
+ while (entry != end) {
+
+ /*
+ * log entries denote the start of a new transaction
+ * commit:
+ */
+ if (entry_is_transaction_start(entry)) {
+ if (!should_print_transaction(entry, end,
+ transaction_msg_filter,
+ transaction_key_filter)) {
+ do {
+ entry = vstruct_next(entry);
+ } while (entry != end && !entry_is_transaction_start(entry));
+
+ continue;
+ }
+
+ prt_newline(&buf);
+ }
+
+ if (!should_print_entry(entry, key_filter))
+ goto next;
+
+ if (!printed_header)
+ journal_entry_header_print(c, p, blacklisted);
+ printed_header = true;
+
+ bool highlight = entry_matches_transaction_filter(entry, transaction_key_filter);
+ if (highlight)
+ fputs(RED, stdout);
+
+ printbuf_indent_add(&buf, 4);
+ bch2_journal_entry_to_text(&buf, c, entry);
+
+ if (blacklisted)
+ star_start_of_lines(buf.buf);
+ printf("%s\n", buf.buf);
+ printbuf_reset(&buf);
+
+ if (highlight)
+ fputs(NORMAL, stdout);
+next:
+ entry = vstruct_next(entry);
+ }
+ }
+
+ printbuf_exit(&buf);
+}
+
+int cmd_list_journal(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "nr-entries", required_argument, NULL, 'n' },
+ { "transaction-filter", required_argument, NULL, 't' },
+ { "key-filter", required_argument, NULL, 'k' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL }
+ };
+ struct bch_opts opts = bch2_opts_empty();
+ u32 nr_entries = U32_MAX;
+ darray_str transaction_msg_filter = {};
+ d_bbpos_range transaction_key_filter = {};
+ d_btree_id key_filter = {};
+ int opt;
+
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, norecovery, true);
+ opt_set(opts, read_only, true);
+ opt_set(opts, degraded, true);
+ opt_set(opts, very_degraded, true);
+ opt_set(opts, errors, BCH_ON_ERROR_continue);
+ opt_set(opts, fix_errors, FSCK_FIX_yes);
+ opt_set(opts, retain_recovery_info ,true);
+ opt_set(opts, read_journal_only,true);
+
+ while ((opt = getopt_long(argc, argv, "an:m:t:k:vh",
+ longopts, NULL)) != -1)
+ switch (opt) {
+ case 'a':
+ opt_set(opts, read_entire_journal, true);
+ break;
+ case 'n':
+ if (kstrtouint(optarg, 10, &nr_entries))
+ die("error parsing nr_entries");
+ opt_set(opts, read_entire_journal, true);
+ break;
+ case 'm':
+ darray_push(&transaction_msg_filter, strdup(optarg));
+ break;
+ case 't':
+ darray_push(&transaction_key_filter, bbpos_range_parse(optarg));
+ break;
+ case 'k':
+ darray_push(&key_filter, read_string_list_or_die(optarg, __bch2_btree_ids, "btree id"));
+ break;
+ case 'v':
+ opt_set(opts, verbose, true);
+ break;
+ case 'h':
+ list_journal_usage();
+ exit(EXIT_SUCCESS);
+ }
+ args_shift(optind);
+
+ if (!argc)
+ die("Please supply device(s) to open");
+
+ darray_str devs = get_or_split_cmdline_devs(argc, argv);
+
+ struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts);
+ if (IS_ERR(c))
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
+
+ journal_entries_print(c, nr_entries,
+ transaction_msg_filter,
+ transaction_key_filter,
+ key_filter);
+ bch2_fs_stop(c);
+ return 0;
+}
diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c
new file mode 100644
index 00000000..a5b7786d
--- /dev/null
+++ b/c_src/cmd_migrate.c
@@ -0,0 +1,426 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include <linux/fiemap.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "crypto.h"
+#include "libbcachefs.h"
+#include "posix_to_bcachefs.h"
+
+#include <linux/dcache.h>
+#include <linux/generic-radix-tree.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/btree_update.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/super.h"
+
+/* XXX cut and pasted from fsck.c */
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static char *dev_t_to_path(dev_t dev)
+{
+ char link[PATH_MAX], *p;
+ int ret;
+
+ char *sysfs_dev = mprintf("/sys/dev/block/%u:%u",
+ major(dev), minor(dev));
+ ret = readlink(sysfs_dev, link, sizeof(link));
+ free(sysfs_dev);
+
+ if (ret < 0 || ret >= sizeof(link))
+ die("readlink error while looking up block device: %m");
+
+ link[ret] = '\0';
+
+ p = strrchr(link, '/');
+ if (!p)
+ die("error looking up device name");
+ p++;
+
+ return mprintf("/dev/%s", p);
+}
+
+static bool path_is_fs_root(const char *path)
+{
+ char *line = NULL, *p, *mount;
+ size_t n = 0;
+ FILE *f;
+ bool ret = true;
+
+ f = fopen("/proc/self/mountinfo", "r");
+ if (!f)
+ die("Error getting mount information");
+
+ while (getline(&line, &n, f) != -1) {
+ p = line;
+
+ strsep(&p, " "); /* mount id */
+ strsep(&p, " "); /* parent id */
+ strsep(&p, " "); /* dev */
+ strsep(&p, " "); /* root */
+ mount = strsep(&p, " ");
+ strsep(&p, " ");
+
+ if (mount && !strcmp(path, mount))
+ goto found;
+ }
+
+ ret = false;
+found:
+ fclose(f);
+ free(line);
+ return ret;
+}
+
+static void mark_unreserved_space(struct bch_fs *c, ranges extents)
+{
+ struct bch_dev *ca = c->devs[0];
+ struct hole_iter iter;
+ struct range i;
+
+ for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) {
+ u64 b;
+
+ if (i.start == i.end)
+ return;
+
+ b = sector_to_bucket(ca, i.start >> 9);
+ do {
+ set_bit(b, ca->buckets_nouse);
+ b++;
+ } while (bucket_to_sector(ca, b) << 9 < i.end);
+ }
+}
+
+static ranges reserve_new_fs_space(const char *file_path, unsigned block_size,
+ u64 size, u64 *bcachefs_inum, dev_t dev,
+ bool force)
+{
+ int fd = force
+ ? open(file_path, O_RDWR|O_CREAT, 0600)
+ : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600);
+ if (fd < 0)
+ die("Error creating %s for bcachefs metadata: %m",
+ file_path);
+
+ struct stat statbuf = xfstat(fd);
+
+ if (statbuf.st_dev != dev)
+ die("bcachefs file has incorrect device");
+
+ *bcachefs_inum = statbuf.st_ino;
+
+ if (fallocate(fd, 0, 0, size))
+ die("Error reserving space for bcachefs metadata: %m");
+
+ fsync(fd);
+
+ struct fiemap_iter iter;
+ struct fiemap_extent e;
+ ranges extents = { 0 };
+
+ fiemap_for_each(fd, iter, e) {
+ if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+ FIEMAP_EXTENT_ENCODED|
+ FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE))
+ die("Unable to continue: metadata file not fully mapped");
+
+ if ((e.fe_physical & (block_size - 1)) ||
+ (e.fe_length & (block_size - 1)))
+ die("Unable to continue: unaligned extents in metadata file");
+
+ range_add(&extents, e.fe_physical, e.fe_length);
+ }
+ fiemap_iter_exit(&iter);
+ close(fd);
+
+ ranges_sort_merge(&extents);
+ return extents;
+}
+
+static void find_superblock_space(ranges extents,
+ struct format_opts opts,
+ struct dev_opts *dev)
+{
+ darray_for_each(extents, i) {
+ u64 start = round_up(max(256ULL << 10, i->start),
+ dev->bucket_size << 9);
+ u64 end = round_down(i->end,
+ dev->bucket_size << 9);
+
+ /* Need space for two superblocks: */
+ if (start + (opts.superblock_size << 9) * 2 <= end) {
+ dev->sb_offset = start >> 9;
+ dev->sb_end = dev->sb_offset + opts.superblock_size * 2;
+ return;
+ }
+ }
+
+ die("Couldn't find a valid location for superblock");
+}
+
+static void migrate_usage(void)
+{
+ puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n"
+ "Usage: bcachefs migrate [OPTION]...\n"
+ "\n"
+ "Options:\n"
+ " -f fs Root of filesystem to migrate(s)\n"
+ " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n"
+ " --no_passphrase Don't encrypt master encryption key\n"
+ " -F Force, even if metadata file already exists\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+static const struct option migrate_opts[] = {
+ { "encrypted", no_argument, NULL, 'e' },
+ { "no_passphrase", no_argument, NULL, 'p' },
+ { NULL }
+};
+
+static int migrate_fs(const char *fs_path,
+ struct bch_opt_strs fs_opt_strs,
+ struct bch_opts fs_opts,
+ struct format_opts format_opts,
+ bool force)
+{
+ if (!path_is_fs_root(fs_path))
+ die("%s is not a filesystem root", fs_path);
+
+ int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME);
+ struct stat stat = xfstat(fs_fd);
+
+ if (!S_ISDIR(stat.st_mode))
+ die("%s is not a directory", fs_path);
+
+ struct dev_opts dev = dev_opts_default();
+
+ dev.path = dev_t_to_path(stat.st_dev);
+ dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL);
+
+ int ret = PTR_ERR_OR_ZERO(dev.file);
+ if (ret < 0)
+ die("Error opening device to format %s: %s", dev.path, strerror(-ret));
+ dev.bdev = file_bdev(dev.file);
+
+ opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd));
+
+ char *file_path = mprintf("%s/bcachefs", fs_path);
+ printf("Creating new filesystem on %s in space reserved at %s\n",
+ dev.path, file_path);
+
+ dev.size = get_size(dev.bdev->bd_fd);
+ dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev);
+ dev.nbuckets = dev.size / dev.bucket_size;
+
+ bch2_check_bucket_size(fs_opts, &dev);
+
+ u64 bcachefs_inum;
+ ranges extents = reserve_new_fs_space(file_path,
+ fs_opts.block_size >> 9,
+ get_size(dev.bdev->bd_fd) / 5,
+ &bcachefs_inum, stat.st_dev, force);
+
+ find_superblock_space(extents, format_opts, &dev);
+
+ struct bch_sb *sb = bch2_format(fs_opt_strs,
+ fs_opts, format_opts, &dev, 1);
+ u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]);
+
+ if (format_opts.passphrase)
+ bch2_add_key(sb, "user", "user", format_opts.passphrase);
+
+ free(sb);
+
+ struct bch_opts opts = bch2_opts_empty();
+ struct bch_fs *c = NULL;
+ char *path[1] = { dev.path };
+
+ opt_set(opts, sb, sb_offset);
+ opt_set(opts, nostart, true);
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nostart, true);
+
+ c = bch2_fs_open(path, 1, opts);
+ if (IS_ERR(c))
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+ ret = bch2_buckets_nouse_alloc(c);
+ if (ret)
+ die("Error allocating buckets_nouse: %s", bch2_err_str(ret));
+
+ ret = bch2_fs_start(c);
+ if (IS_ERR(c))
+ die("Error starting new filesystem: %s", bch2_err_str(ret));
+
+ mark_unreserved_space(c, extents);
+
+ ret = bch2_fs_start(c);
+ if (ret)
+ die("Error starting new filesystem: %s", bch2_err_str(ret));
+
+ struct copy_fs_state s = {
+ .bcachefs_inum = bcachefs_inum,
+ .dev = stat.st_dev,
+ .extents = extents,
+ .type = BCH_MIGRATE_migrate,
+ };
+
+ copy_fs(c, fs_fd, fs_path, &s);
+
+ bch2_fs_stop(c);
+
+ printf("Migrate complete, running fsck:\n");
+ opt_set(opts, nostart, false);
+ opt_set(opts, nochanges, true);
+ opt_set(opts, read_only, true);
+
+ c = bch2_fs_open(path, 1, opts);
+ if (IS_ERR(c))
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
+
+ bch2_fs_stop(c);
+ printf("fsck complete\n");
+
+ printf("To mount the new filesystem, run\n"
+ " mount -t bcachefs -o sb=%llu %s dir\n"
+ "\n"
+ "After verifying that the new filesystem is correct, to create a\n"
+ "superblock at the default offset and finish the migration run\n"
+ " bcachefs migrate-superblock -d %s -o %llu\n"
+ "\n"
+ "The new filesystem will have a file at /old_migrated_filesystem\n"
+ "referencing all disk space that might be used by the existing\n"
+ "filesystem. That file can be deleted once the old filesystem is\n"
+ "no longer needed (and should be deleted prior to running\n"
+ "bcachefs migrate-superblock)\n",
+ sb_offset, dev.path, dev.path, sb_offset);
+ return 0;
+}
+
+int cmd_migrate(int argc, char *argv[])
+{
+ struct format_opts format_opts = format_opts_default();
+ char *fs_path = NULL;
+ bool no_passphrase = false, force = false;
+ int opt;
+
+ struct bch_opt_strs fs_opt_strs =
+ bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT);
+ struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs);
+
+ while ((opt = getopt_long(argc, argv, "f:Fh",
+ migrate_opts, NULL)) != -1)
+ switch (opt) {
+ case 'f':
+ fs_path = optarg;
+ break;
+ case 'e':
+ format_opts.encrypted = true;
+ break;
+ case 'p':
+ no_passphrase = true;
+ break;
+ case 'F':
+ force = true;
+ break;
+ case 'h':
+ migrate_usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!fs_path)
+ die("Please specify a filesystem to migrate");
+
+ if (format_opts.encrypted && !no_passphrase)
+ format_opts.passphrase = read_passphrase_twice("Enter passphrase: ");
+
+ int ret = migrate_fs(fs_path,
+ fs_opt_strs,
+ fs_opts,
+ format_opts, force);
+ bch2_opt_strs_free(&fs_opt_strs);
+ return ret;
+}
+
+static void migrate_superblock_usage(void)
+{
+ puts("bcachefs migrate-superblock - create default superblock after migrating\n"
+ "Usage: bcachefs migrate-superblock [OPTION]...\n"
+ "\n"
+ "Options:\n"
+ " -d device Device to create superblock for\n"
+ " -o offset Offset of existing superblock\n"
+ " -h Display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+}
+
+int cmd_migrate_superblock(int argc, char *argv[])
+{
+ char *dev = NULL;
+ u64 offset = 0;
+ int opt, ret;
+
+ while ((opt = getopt(argc, argv, "d:o:h")) != -1)
+ switch (opt) {
+ case 'd':
+ dev = optarg;
+ break;
+ case 'o':
+ ret = kstrtou64(optarg, 10, &offset);
+ if (ret)
+ die("Invalid offset");
+ break;
+ case 'h':
+ migrate_superblock_usage();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!dev)
+ die("Please specify a device");
+
+ if (!offset)
+ die("Please specify offset of existing superblock");
+
+ int fd = xopen(dev, O_RDWR);
+ struct bch_sb *sb = __bch2_super_read(fd, offset);
+
+ if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset))
+ die("Can't add superblock: no space left in superblock layout");
+
+ unsigned i;
+ for (i = 0; i < sb->layout.nr_superblocks; i++)
+ if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR)
+ die("Superblock layout already has default superblock");
+
+ memmove(&sb->layout.sb_offset[1],
+ &sb->layout.sb_offset[0],
+ sb->layout.nr_superblocks * sizeof(u64));
+ sb->layout.nr_superblocks++;
+
+ sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
+
+ bch2_super_write(fd, sb);
+ close(fd);
+
+ return 0;
+}
diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c
new file mode 100644
index 00000000..21048d7d
--- /dev/null
+++ b/c_src/cmd_option.c
@@ -0,0 +1,168 @@
+/*
+ * Authors: Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * GPLv2
+ */
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "libbcachefs.h"
+#include "libbcachefs/errcode.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/super-io.h"
+
+static void set_option_usage(void)
+{
+ puts("bcachefs set-fs-option \n"
+ "Usage: bcachefs set-fs-option [OPTION].. device\n"
+ "\n"
+ "Options:\n");
+ bch2_opts_usage(OPT_MOUNT);
+ puts(" -h, --help display this help and exit\n"
+ "Report bugs to <linux-bcachefs@vger.kernel.org>");
+ exit(EXIT_SUCCESS);
+}
+
+static int name_to_dev_idx(struct bch_fs *c, const char *dev)
+{
+ int ret = -1;
+
+ rcu_read_lock();
+ for_each_member_device_rcu(c, ca, NULL)
+ if (!strcmp(ca->name, dev)) {
+ ret = ca->dev_idx;
+ break;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+int cmd_set_option(int argc, char *argv[])
+{
+ struct bch_opt_strs new_opt_strs = bch2_cmdline_opts_get(&argc, argv, OPT_MOUNT|OPT_DEVICE);
+ struct bch_opts new_opts = bch2_parse_opts(new_opt_strs);
+ unsigned i;
+ int opt, ret = 0;
+
+ while ((opt = getopt(argc, argv, "h")) != -1)
+ switch (opt) {
+ case 'h':
+ set_option_usage();
+ break;
+ }
+ args_shift(optind);
+
+ if (!argc) {
+ fprintf(stderr, "Please supply device(s)\n");
+ exit(EXIT_FAILURE);
+ }
+
+ bool online = false;
+ for (i = 0; i < argc; i++)
+ if (dev_mounted(argv[i])) {
+ online = true;
+ break;
+ }
+
+ if (!online) {
+ struct bch_opts open_opts = bch2_opts_empty();
+ opt_set(open_opts, nostart, true);
+
+ struct bch_fs *c = bch2_fs_open(argv, argc, open_opts);
+ if (IS_ERR(c)) {
+ fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c)));
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ const struct bch_option *opt = bch2_opt_table + i;
+
+ u64 v = bch2_opt_get_by_id(&new_opts, i);
+
+ if (!bch2_opt_defined_by_id(&new_opts, i))
+ continue;
+
+ ret = bch2_opt_check_may_set(c, i, v);
+ if (ret < 0) {
+ fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret);
+ continue;
+ }
+
+ if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
+ fprintf(stderr, "Can't set option %s\n", opt->attr.name);
+
+ if (opt->flags & OPT_FS) {
+ bch2_opt_set_sb(c, NULL, opt, v);
+ }
+
+ if (opt->flags & OPT_DEVICE) {
+ for (unsigned dev = 0; dev < argc; dev++) {
+ int dev_idx = name_to_dev_idx(c, argv[dev]);
+ if (dev_idx < 0) {
+ fprintf(stderr, "Couldn't look up device %s\n", argv[i]);
+ continue;
+ }
+
+ bch2_opt_set_sb(c, c->devs[dev_idx], opt, v);
+ }
+ }
+ }
+
+ bch2_fs_stop(c);
+ return ret;
+ } else {
+ unsigned dev_idx;
+ struct bchfs_handle fs = bchu_fs_open_by_dev(argv[i], &dev_idx);
+
+ for (i = 0; i < argc; i++) {
+ struct bchfs_handle fs2 = bchu_fs_open_by_dev(argv[i], &dev_idx);
+ if (memcmp(&fs.uuid, &fs2.uuid, sizeof(fs2.uuid)))
+ die("Filesystem mounted, but not all devices are members");
+ bcache_fs_close(fs2);
+ }
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!new_opt_strs.by_id[i])
+ continue;
+
+ const struct bch_option *opt = bch2_opt_table + i;
+
+ if (!(opt->flags & (OPT_FS|OPT_DEVICE)))
+ fprintf(stderr, "Can't set option %s\n", opt->attr.name);
+
+ if (opt->flags & OPT_FS) {
+ char *path = mprintf("options/%s", opt->attr.name);
+
+ write_file_str(fs.sysfs_fd, path, new_opt_strs.by_id[i]);
+ free(path);
+ }
+
+ if (opt->flags & OPT_DEVICE) {
+ for (unsigned dev = 0; dev < argc; dev++) {
+ struct bchfs_handle fs2 = bchu_fs_open_by_dev(argv[i], &dev_idx);
+ bcache_fs_close(fs2);
+
+
+ char *path = mprintf("dev-%u/%s", dev_idx, opt->attr.name);
+ write_file_str(fs.sysfs_fd, path, new_opt_strs.by_id[i]);
+ free(path);
+ }
+ }
+ }
+ }
+ return 0;
+}
diff --git a/c_src/cmd_run.c b/c_src/cmd_run.c
new file mode 100644
index 00000000..1bf84e5c
--- /dev/null
+++ b/c_src/cmd_run.c
@@ -0,0 +1,33 @@
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "cmds.h"
+#include "libbcachefs.h"
+
+#if 0
+int cmd_run(int argc, char *argv[])
+{
+ return 0;
+}
+
+int cmd_stop(int argc, char *argv[])
+{
+ if (argc != 2)
+ die("Please supply a filesystem");
+
+ struct bchfs_handle fs = bcache_fs_open(argv[1]);
+ xioctl(fs.ioctl_fd, BCH_IOCTL_STOP);
+ return 0;
+}
+#endif
diff --git a/c_src/cmd_version.c b/c_src/cmd_version.c
new file mode 100644
index 00000000..5fe30e5e
--- /dev/null
+++ b/c_src/cmd_version.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+#include "cmds.h"
+
+int cmd_version(int argc, char *argv[])
+{
+ printf("%s\n", VERSION_STRING);
+ return 0;
+}
diff --git a/c_src/cmds.h b/c_src/cmds.h
new file mode 100644
index 00000000..64267dc4
--- /dev/null
+++ b/c_src/cmds.h
@@ -0,0 +1,63 @@
+/*
+ * Author: Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * GPLv2
+ */
+
+#ifndef _CMDS_H
+#define _CMDS_H
+
+#include "tools-util.h"
+
+int cmd_format(int argc, char *argv[]);
+int cmd_show_super(int argc, char *argv[]);
+int cmd_reset_counters(int argc, char *argv[]);
+int cmd_set_option(int argc, char *argv[]);
+
+int cmd_fs_usage(int argc, char *argv[]);
+
+int device_usage(void);
+int cmd_device_add(int argc, char *argv[]);
+int cmd_device_remove(int argc, char *argv[]);
+int cmd_device_online(int argc, char *argv[]);
+int cmd_device_offline(int argc, char *argv[]);
+int cmd_device_evacuate(int argc, char *argv[]);
+int cmd_device_set_state(int argc, char *argv[]);
+int cmd_device_resize(int argc, char *argv[]);
+int cmd_device_resize_journal(int argc, char *argv[]);
+
+int data_usage(void);
+int cmd_data_rereplicate(int argc, char *argv[]);
+int cmd_data_job(int argc, char *argv[]);
+
+int cmd_unlock(int argc, char *argv[]);
+int cmd_set_passphrase(int argc, char *argv[]);
+int cmd_remove_passphrase(int argc, char *argv[]);
+
+int cmd_fsck(int argc, char *argv[]);
+
+int cmd_dump(int argc, char *argv[]);
+int cmd_list_journal(int argc, char *argv[]);
+int cmd_kill_btree_node(int argc, char *argv[]);
+
+int cmd_migrate(int argc, char *argv[]);
+int cmd_migrate_superblock(int argc, char *argv[]);
+
+int cmd_version(int argc, char *argv[]);
+
+int cmd_setattr(int argc, char *argv[]);
+
+int subvolume_usage(void);
+int cmd_subvolume_create(int argc, char *argv[]);
+int cmd_subvolume_delete(int argc, char *argv[]);
+int cmd_subvolume_snapshot(int argc, char *argv[]);
+
+int cmd_fusemount(int argc, char *argv[]);
+
+void bcachefs_usage(void);
+int device_cmds(int argc, char *argv[]);
+int fs_cmds(int argc, char *argv[]);
+int data_cmds(int argc, char *argv[]);
+int subvolume_cmds(int argc, char *argv[]);
+
+#endif /* _CMDS_H */
diff --git a/c_src/config.h b/c_src/config.h
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/c_src/config.h
diff --git a/c_src/crypto.c b/c_src/crypto.c
new file mode 100644
index 00000000..32671bd8
--- /dev/null
+++ b/c_src/crypto.c
@@ -0,0 +1,201 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <keyutils.h>
+#include <linux/random.h>
+#include <sodium/crypto_pwhash_scryptsalsa208sha256.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs/checksum.h"
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ ssize_t len;
+
+ if (isatty(STDIN_FILENO)) {
+ struct termios old, new;
+
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+
+ if (tcgetattr(STDIN_FILENO, &old))
+ die("error getting terminal attrs");
+
+ new = old;
+ new.c_lflag &= ~ECHO;
+ if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new))
+ die("error setting terminal attrs");
+
+ len = getline(&buf, &buflen, stdin);
+
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &old);
+ fprintf(stderr, "\n");
+ } else {
+ len = getline(&buf, &buflen, stdin);
+ }
+
+ if (len < 0)
+ die("error reading passphrase");
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+char *read_passphrase_twice(const char *prompt)
+{
+ char *pass = read_passphrase(prompt);
+
+ if (!isatty(STDIN_FILENO))
+ return pass;
+
+ char *pass2 = read_passphrase("Enter same passphrase again: ");
+
+ if (strcmp(pass, pass2)) {
+ memzero_explicit(pass, strlen(pass));
+ memzero_explicit(pass2, strlen(pass2));
+ die("Passphrases do not match");
+ }
+
+ memzero_explicit(pass2, strlen(pass2));
+ free(pass2);
+
+ return pass;
+}
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *crypt,
+ const char *passphrase)
+{
+ const unsigned char salt[] = "bcache";
+ struct bch_key key;
+ int ret;
+
+ switch (BCH_CRYPT_KDF_TYPE(crypt)) {
+ case BCH_KDF_SCRYPT:
+ ret = crypto_pwhash_scryptsalsa208sha256_ll(
+ (void *) passphrase, strlen(passphrase),
+ salt, sizeof(salt),
+ 1ULL << BCH_KDF_SCRYPT_N(crypt),
+ 1ULL << BCH_KDF_SCRYPT_R(crypt),
+ 1ULL << BCH_KDF_SCRYPT_P(crypt),
+ (void *) &key, sizeof(key));
+ if (ret)
+ die("scrypt error: %i", ret);
+ break;
+ default:
+ die("unknown kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
+ }
+
+ return key;
+}
+
+bool bch2_sb_is_encrypted(struct bch_sb *sb)
+{
+ struct bch_sb_field_crypt *crypt;
+
+ return (crypt = bch2_sb_field_get(sb, crypt)) &&
+ bch2_key_is_encrypted(&crypt->key);
+}
+
+void bch2_passphrase_check(struct bch_sb *sb, const char *passphrase,
+ struct bch_key *passphrase_key,
+ struct bch_encrypted_key *sb_key)
+{
+ struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt);
+ if (!crypt)
+ die("filesystem is not encrypted");
+
+ *sb_key = crypt->key;
+
+ if (!bch2_key_is_encrypted(sb_key))
+ die("filesystem does not have encryption key");
+
+ *passphrase_key = derive_passphrase(crypt, passphrase);
+
+ /* Check if the user supplied the correct passphrase: */
+ if (bch2_chacha_encrypt_key(passphrase_key, __bch2_sb_key_nonce(sb),
+ sb_key, sizeof(*sb_key)))
+ die("error encrypting key");
+
+ if (bch2_key_is_encrypted(sb_key))
+ die("incorrect passphrase");
+}
+
+void bch2_add_key(struct bch_sb *sb,
+ const char *type,
+ const char *keyring_str,
+ const char *passphrase)
+{
+ struct bch_key passphrase_key;
+ struct bch_encrypted_key sb_key;
+ int keyring;
+
+ if (!strcmp(keyring_str, "session"))
+ keyring = KEY_SPEC_SESSION_KEYRING;
+ else if (!strcmp(keyring_str, "user"))
+ keyring = KEY_SPEC_USER_KEYRING;
+ else if (!strcmp(keyring_str, "user_session"))
+ keyring = KEY_SPEC_USER_SESSION_KEYRING;
+ else
+ die("unknown keyring %s", keyring_str);
+
+ bch2_passphrase_check(sb, passphrase,
+ &passphrase_key,
+ &sb_key);
+
+ char uuid[40];
+ uuid_unparse_lower(sb->user_uuid.b, uuid);
+
+ char *description = mprintf("bcachefs:%s", uuid);
+
+ if (add_key(type,
+ description,
+ &passphrase_key, sizeof(passphrase_key),
+ keyring) < 0)
+ die("add_key error: %m");
+
+ memzero_explicit(description, strlen(description));
+ free(description);
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+ memzero_explicit(&sb_key, sizeof(sb_key));
+}
+
+void bch_sb_crypt_init(struct bch_sb *sb,
+ struct bch_sb_field_crypt *crypt,
+ const char *passphrase)
+{
+ crypt->key.magic = BCH_KEY_MAGIC;
+ get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
+
+ if (passphrase) {
+
+ SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
+ SET_BCH_KDF_SCRYPT_N(crypt, ilog2(16384));
+ SET_BCH_KDF_SCRYPT_R(crypt, ilog2(8));
+ SET_BCH_KDF_SCRYPT_P(crypt, ilog2(16));
+
+ struct bch_key passphrase_key = derive_passphrase(crypt, passphrase);
+
+ assert(!bch2_key_is_encrypted(&crypt->key));
+
+ if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb),
+ &crypt->key, sizeof(crypt->key)))
+ die("error encrypting key");
+
+ assert(bch2_key_is_encrypted(&crypt->key));
+
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+ }
+}
diff --git a/c_src/crypto.h b/c_src/crypto.h
new file mode 100644
index 00000000..baea6d86
--- /dev/null
+++ b/c_src/crypto.h
@@ -0,0 +1,22 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "tools-util.h"
+
+struct bch_sb;
+struct bch_sb_field_crypt;
+struct bch_key;
+struct bch_encrypted_key;
+
+char *read_passphrase(const char *);
+char *read_passphrase_twice(const char *);
+
+struct bch_key derive_passphrase(struct bch_sb_field_crypt *, const char *);
+bool bch2_sb_is_encrypted(struct bch_sb *);
+void bch2_passphrase_check(struct bch_sb *, const char *,
+ struct bch_key *, struct bch_encrypted_key *);
+void bch2_add_key(struct bch_sb *, const char *, const char *, const char *);
+void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
+ const char *);
+
+#endif /* _CRYPTO_H */
diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c
new file mode 100644
index 00000000..75cab72c
--- /dev/null
+++ b/c_src/libbcachefs.c
@@ -0,0 +1,754 @@
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <uuid/uuid.h>
+
+#include "libbcachefs.h"
+#include "crypto.h"
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/btree_cache.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/checksum.h"
+#include "libbcachefs/disk_groups.h"
+#include "libbcachefs/journal_seq_blacklist.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/replicas.h"
+#include "libbcachefs/super-io.h"
+#include "tools-util.h"
+
+#define NSEC_PER_SEC 1000000000L
+
+static void init_layout(struct bch_sb_layout *l,
+ unsigned block_size,
+ unsigned sb_size,
+ u64 sb_start, u64 sb_end)
+{
+ u64 sb_pos = sb_start;
+ unsigned i;
+
+ memset(l, 0, sizeof(*l));
+
+ l->magic = BCHFS_MAGIC;
+ l->layout_type = 0;
+ l->nr_superblocks = 2;
+ l->sb_max_size_bits = ilog2(sb_size);
+
+ /* Create two superblocks in the allowed range: */
+ for (i = 0; i < l->nr_superblocks; i++) {
+ if (sb_pos != BCH_SB_SECTOR)
+ sb_pos = round_up(sb_pos, block_size >> 9);
+
+ l->sb_offset[i] = cpu_to_le64(sb_pos);
+ sb_pos += sb_size;
+ }
+
+ if (sb_pos > sb_end)
+ die("insufficient space for superblocks: start %llu end %llu > %llu size %u",
+ sb_start, sb_pos, sb_end, sb_size);
+}
+
+/* minimum size filesystem we can create, given a bucket size: */
+static u64 min_size(unsigned bucket_size)
+{
+ return BCH_MIN_NR_NBUCKETS * bucket_size;
+}
+
+u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+{
+ u64 bucket_size;
+
+ if (dev->size < min_size(opts.block_size))
+ die("cannot format %s, too small (%llu bytes, min %llu)",
+ dev->path, dev->size, min_size(opts.block_size));
+
+ /* Bucket size must be >= block size: */
+ bucket_size = opts.block_size;
+
+ /* Bucket size must be >= btree node size: */
+ if (opt_defined(opts, btree_node_size))
+ bucket_size = max_t(unsigned, bucket_size,
+ opts.btree_node_size);
+
+ /* Want a bucket size of at least 128k, if possible: */
+ bucket_size = max(bucket_size, 128ULL << 10);
+
+ if (dev->size >= min_size(bucket_size)) {
+ unsigned scale = max(1,
+ ilog2(dev->size / min_size(bucket_size)) / 4);
+
+ scale = rounddown_pow_of_two(scale);
+
+ /* max bucket size 1 mb */
+ bucket_size = min(bucket_size * scale, 1ULL << 20);
+ } else {
+ do {
+ bucket_size /= 2;
+ } while (dev->size < min_size(bucket_size));
+ }
+
+ return bucket_size;
+}
+
+void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev)
+{
+ if (dev->bucket_size < opts.block_size)
+ die("Bucket size (%llu) cannot be smaller than block size (%u)",
+ dev->bucket_size, opts.block_size);
+
+ if (opt_defined(opts, btree_node_size) &&
+ dev->bucket_size < opts.btree_node_size)
+ die("Bucket size (%llu) cannot be smaller than btree node size (%u)",
+ dev->bucket_size, opts.btree_node_size);
+
+ if (dev->nbuckets < BCH_MIN_NR_NBUCKETS)
+ die("Not enough buckets: %llu, need %u (bucket size %llu)",
+ dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size);
+
+ if (dev->bucket_size > (u32) U16_MAX << 9)
+ die("Bucket size (%llu) too big (max %u)",
+ dev->bucket_size, (u32) U16_MAX << 9);
+}
+
+static unsigned parse_target(struct bch_sb_handle *sb,
+ struct dev_opts *devs, size_t nr_devs,
+ const char *s)
+{
+ struct dev_opts *i;
+ int idx;
+
+ if (!s)
+ return 0;
+
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!strcmp(s, i->path))
+ return dev_to_target(i - devs);
+
+ idx = bch2_disk_path_find(sb, s);
+ if (idx >= 0)
+ return group_to_target(idx);
+
+ die("Invalid target %s", s);
+ return 0;
+}
+
+struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs,
+ struct bch_opts fs_opts,
+ struct format_opts opts,
+ struct dev_opts *devs,
+ size_t nr_devs)
+{
+ struct bch_sb_handle sb = { NULL };
+ struct dev_opts *i;
+ unsigned max_dev_block_size = 0;
+ unsigned opt_id;
+ u64 min_bucket_size = U64_MAX;
+
+ for (i = devs; i < devs + nr_devs; i++)
+ max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd));
+
+ /* calculate block size: */
+ if (!opt_defined(fs_opts, block_size)) {
+ opt_set(fs_opts, block_size, max_dev_block_size);
+ } else if (fs_opts.block_size < max_dev_block_size)
+ die("blocksize too small: %u, must be greater than device blocksize %u",
+ fs_opts.block_size, max_dev_block_size);
+
+ /* get device size, if it wasn't specified: */
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!i->size)
+ i->size = get_size(i->bdev->bd_fd);
+
+ /* calculate bucket sizes: */
+ for (i = devs; i < devs + nr_devs; i++)
+ min_bucket_size = min(min_bucket_size,
+ i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i));
+
+ for (i = devs; i < devs + nr_devs; i++)
+ if (!i->bucket_size)
+ i->bucket_size = min_bucket_size;
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ i->nbuckets = i->size / i->bucket_size;
+ bch2_check_bucket_size(fs_opts, i);
+ }
+
+ /* calculate btree node size: */
+ if (!opt_defined(fs_opts, btree_node_size)) {
+ /* 256k default btree node size */
+ opt_set(fs_opts, btree_node_size, 256 << 10);
+
+ for (i = devs; i < devs + nr_devs; i++)
+ fs_opts.btree_node_size =
+ min_t(unsigned, fs_opts.btree_node_size,
+ i->bucket_size);
+ }
+
+ if (uuid_is_null(opts.uuid.b))
+ uuid_generate(opts.uuid.b);
+
+ if (bch2_sb_realloc(&sb, 0))
+ die("insufficient memory");
+
+ sb.sb->version = le16_to_cpu(opts.version);
+ sb.sb->version_min = le16_to_cpu(opts.version);
+ sb.sb->magic = BCHFS_MAGIC;
+ sb.sb->user_uuid = opts.uuid;
+ sb.sb->nr_devices = nr_devs;
+
+ if (opts.version == bcachefs_metadata_version_current)
+ sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
+
+ uuid_generate(sb.sb->uuid.b);
+
+ if (opts.label)
+ memcpy(sb.sb->label,
+ opts.label,
+ min(strlen(opts.label), sizeof(sb.sb->label)));
+
+ for (opt_id = 0;
+ opt_id < bch2_opts_nr;
+ opt_id++) {
+ u64 v;
+
+ v = bch2_opt_defined_by_id(&fs_opts, opt_id)
+ ? bch2_opt_get_by_id(&fs_opts, opt_id)
+ : bch2_opt_get_by_id(&bch2_opts_default, opt_id);
+
+ __bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v);
+ }
+
+ struct timespec now;
+ if (clock_gettime(CLOCK_REALTIME, &now))
+ die("error getting current time: %m");
+
+ sb.sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
+ sb.sb->time_precision = cpu_to_le32(1);
+
+ /* Member info: */
+ struct bch_sb_field_members_v2 *mi =
+ bch2_sb_field_resize(&sb, members_v2,
+ (sizeof(*mi) + sizeof(struct bch_member) *
+ nr_devs) / sizeof(u64));
+ mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs));
+
+ uuid_generate(m->uuid.b);
+ m->nbuckets = cpu_to_le64(i->nbuckets);
+ m->first_bucket = 0;
+ m->bucket_size = cpu_to_le16(i->bucket_size >> 9);
+
+ SET_BCH_MEMBER_DISCARD(m, i->discard);
+ SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed);
+ SET_BCH_MEMBER_DURABILITY(m, i->durability + 1);
+ }
+
+ /* Disk labels*/
+ for (i = devs; i < devs + nr_devs; i++) {
+ struct bch_member *m;
+ int idx;
+
+ if (!i->label)
+ continue;
+
+ idx = bch2_disk_path_find_or_create(&sb, i->label);
+ if (idx < 0)
+ die("error creating disk path: %s", strerror(-idx));
+
+ /*
+ * Recompute mi and m after each sb modification: its location
+ * in memory may have changed due to reallocation.
+ */
+ m = bch2_members_v2_get_mut(sb.sb, (i - devs));
+ SET_BCH_MEMBER_GROUP(m, idx + 1);
+ }
+
+ SET_BCH_SB_FOREGROUND_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target));
+ SET_BCH_SB_BACKGROUND_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target));
+ SET_BCH_SB_PROMOTE_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target));
+ SET_BCH_SB_METADATA_TARGET(sb.sb,
+ parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target));
+
+ /* Crypt: */
+ if (opts.encrypted) {
+ struct bch_sb_field_crypt *crypt =
+ bch2_sb_field_resize(&sb, crypt, sizeof(*crypt) / sizeof(u64));
+
+ bch_sb_crypt_init(sb.sb, crypt, opts.passphrase);
+ SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1);
+ }
+
+ bch2_sb_members_cpy_v2_v1(&sb);
+
+ for (i = devs; i < devs + nr_devs; i++) {
+ u64 size_sectors = i->size >> 9;
+
+ sb.sb->dev_idx = i - devs;
+
+ if (!i->sb_offset) {
+ i->sb_offset = BCH_SB_SECTOR;
+ i->sb_end = size_sectors;
+ }
+
+ init_layout(&sb.sb->layout, fs_opts.block_size,
+ opts.superblock_size,
+ i->sb_offset, i->sb_end);
+
+ /*
+ * Also create a backup superblock at the end of the disk:
+ *
+ * If we're not creating a superblock at the default offset, it
+ * means we're being run from the migrate tool and we could be
+ * overwriting existing data if we write to the end of the disk:
+ */
+ if (i->sb_offset == BCH_SB_SECTOR) {
+ struct bch_sb_layout *l = &sb.sb->layout;
+ u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits);
+
+ backup_sb = rounddown(backup_sb, i->bucket_size >> 9);
+ l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb);
+ }
+
+ if (i->sb_offset == BCH_SB_SECTOR) {
+ /* Zero start of disk */
+ static const char zeroes[BCH_SB_SECTOR << 9];
+
+ xpwrite(i->bdev->bd_fd, zeroes, BCH_SB_SECTOR << 9, 0,
+ "zeroing start of disk");
+ }
+
+ bch2_super_write(i->bdev->bd_fd, sb.sb);
+ close(i->bdev->bd_fd);
+ }
+
+ return sb.sb;
+}
+
+void bch2_super_write(int fd, struct bch_sb *sb)
+{
+ struct nonce nonce = { 0 };
+ unsigned bs = get_blocksize(fd);
+
+ unsigned i;
+ for (i = 0; i < sb->layout.nr_superblocks; i++) {
+ sb->offset = sb->layout.sb_offset[i];
+
+ if (sb->offset == BCH_SB_SECTOR) {
+ /* Write backup layout */
+
+ BUG_ON(bs > 4096);
+
+ char *buf = aligned_alloc(bs, bs);
+ xpread(fd, buf, bs, 4096 - bs);
+ memcpy(buf + bs - sizeof(sb->layout),
+ &sb->layout,
+ sizeof(sb->layout));
+ xpwrite(fd, buf, bs, 4096 - bs,
+ "backup layout");
+ free(buf);
+
+ }
+
+ sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
+ xpwrite(fd, sb, round_up(vstruct_bytes(sb), bs),
+ le64_to_cpu(sb->offset) << 9,
+ "superblock");
+ }
+
+ fsync(fd);
+}
+
+struct bch_sb *__bch2_super_read(int fd, u64 sector)
+{
+ struct bch_sb sb, *ret;
+
+ xpread(fd, &sb, sizeof(sb), sector << 9);
+
+ if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)) &&
+ memcmp(&sb.magic, &BCHFS_MAGIC, sizeof(sb.magic)))
+ die("not a bcachefs superblock");
+
+ size_t bytes = vstruct_bytes(&sb);
+
+ ret = malloc(bytes);
+
+ xpread(fd, ret, bytes, sector << 9);
+
+ return ret;
+}
+
+/* ioctl interface: */
+
+/* Global control device: */
+int bcachectl_open(void)
+{
+ return xopen("/dev/bcachefs-ctl", O_RDWR);
+}
+
+/* Filesystem handles (ioctl, sysfs dir): */
+
+#define SYSFS_BASE "/sys/fs/bcachefs/"
+
+void bcache_fs_close(struct bchfs_handle fs)
+{
+ close(fs.ioctl_fd);
+ close(fs.sysfs_fd);
+}
+
+struct bchfs_handle bcache_fs_open(const char *path)
+{
+ struct bchfs_handle ret;
+
+ if (!uuid_parse(path, ret.uuid.b)) {
+ /* It's a UUID, look it up in sysfs: */
+ char *sysfs = mprintf(SYSFS_BASE "%s", path);
+ ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+
+ char *minor = read_file_str(ret.sysfs_fd, "minor");
+ char *ctl = mprintf("/dev/bcachefs%s-ctl", minor);
+ ret.ioctl_fd = xopen(ctl, O_RDWR);
+
+ free(sysfs);
+ free(minor);
+ free(ctl);
+ } else {
+ /* It's a path: */
+ ret.ioctl_fd = open(path, O_RDONLY);
+ if (ret.ioctl_fd < 0)
+ die("Error opening filesystem at %s: %m", path);
+
+ struct bch_ioctl_query_uuid uuid;
+ if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0)
+ die("error opening %s: not a bcachefs filesystem", path);
+
+ ret.uuid = uuid.uuid;
+
+ char uuid_str[40];
+ uuid_unparse(uuid.uuid.b, uuid_str);
+
+ char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str);
+ ret.sysfs_fd = xopen(sysfs, O_RDONLY);
+ free(sysfs);
+ }
+
+ return ret;
+}
+
+/*
+ * Given a path to a block device, open the filesystem it belongs to; also
+ * return the device's idx:
+ */
+struct bchfs_handle bchu_fs_open_by_dev(const char *path, int *idx)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ char buf[1024], *uuid_str;
+
+ struct stat stat = xstat(path);
+
+ if (S_ISBLK(stat.st_mode)) {
+ char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs",
+ major(stat.st_dev),
+ minor(stat.st_dev));
+
+ ssize_t len = readlink(sysfs, buf, sizeof(buf));
+ free(sysfs);
+
+ if (len <= 0)
+ goto read_super;
+
+ char *p = strrchr(buf, '/');
+ if (!p || sscanf(p + 1, "dev-%u", idx) != 1)
+ die("error parsing sysfs");
+
+ *p = '\0';
+ p = strrchr(buf, '/');
+ uuid_str = p + 1;
+ } else {
+read_super:
+ opt_set(opts, noexcl, true);
+ opt_set(opts, nochanges, true);
+
+ struct bch_sb_handle sb;
+ int ret = bch2_read_super(path, &opts, &sb);
+ if (ret)
+ die("Error opening %s: %s", path, strerror(-ret));
+
+ *idx = sb.sb->dev_idx;
+ uuid_str = buf;
+ uuid_unparse(sb.sb->user_uuid.b, uuid_str);
+
+ bch2_free_super(&sb);
+ }
+
+ return bcache_fs_open(uuid_str);
+}
+
+int bchu_dev_path_to_idx(struct bchfs_handle fs, const char *dev_path)
+{
+ int idx;
+ struct bchfs_handle fs2 = bchu_fs_open_by_dev(dev_path, &idx);
+
+ if (memcmp(&fs.uuid, &fs2.uuid, sizeof(fs.uuid)))
+ idx = -1;
+ bcache_fs_close(fs2);
+ return idx;
+}
+
+int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd)
+{
+ int progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd);
+
+ while (1) {
+ struct bch_ioctl_data_event e;
+
+ if (read(progress_fd, &e, sizeof(e)) != sizeof(e))
+ die("error reading from progress fd %m");
+
+ if (e.type)
+ continue;
+
+ if (e.p.data_type == U8_MAX)
+ break;
+
+ printf("\33[2K\r");
+
+ printf("%llu%% complete: current position %s",
+ e.p.sectors_total
+ ? e.p.sectors_done * 100 / e.p.sectors_total
+ : 0,
+ bch2_data_type_str(e.p.data_type));
+
+ switch (e.p.data_type) {
+ case BCH_DATA_btree:
+ case BCH_DATA_user:
+ printf(" %s:%llu:%llu",
+ bch2_btree_id_str(e.p.btree_id),
+ e.p.pos.inode,
+ e.p.pos.offset);
+ }
+
+ fflush(stdout);
+ sleep(1);
+ }
+ printf("\nDone\n");
+
+ close(progress_fd);
+ return 0;
+}
+
+/* option parsing */
+
+void bch2_opt_strs_free(struct bch_opt_strs *opts)
+{
+ unsigned i;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ free(opts->by_id[i]);
+ opts->by_id[i] = NULL;
+ }
+}
+
+struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[],
+ unsigned opt_types)
+{
+ struct bch_opt_strs opts;
+ unsigned i = 1;
+
+ memset(&opts, 0, sizeof(opts));
+
+ while (i < *argc) {
+ char *optstr = strcmp_prefix(argv[i], "--");
+ char *valstr = NULL, *p;
+ int optid, nr_args = 1;
+
+ if (!optstr) {
+ i++;
+ continue;
+ }
+
+ optstr = strdup(optstr);
+
+ p = optstr;
+ while (isalpha(*p) || *p == '_')
+ p++;
+
+ if (*p == '=') {
+ *p = '\0';
+ valstr = p + 1;
+ }
+
+ optid = bch2_opt_lookup(optstr);
+ if (optid < 0 ||
+ !(bch2_opt_table[optid].flags & opt_types)) {
+ i++;
+ goto next;
+ }
+
+ if (!valstr &&
+ bch2_opt_table[optid].type != BCH_OPT_BOOL) {
+ nr_args = 2;
+ valstr = argv[i + 1];
+ }
+
+ if (!valstr)
+ valstr = "1";
+
+ opts.by_id[optid] = strdup(valstr);
+
+ *argc -= nr_args;
+ memmove(&argv[i],
+ &argv[i + nr_args],
+ sizeof(char *) * (*argc - i));
+ argv[*argc] = NULL;
+next:
+ free(optstr);
+ }
+
+ return opts;
+}
+
+struct bch_opts bch2_parse_opts(struct bch_opt_strs strs)
+{
+ struct bch_opts opts = bch2_opts_empty();
+ struct printbuf err = PRINTBUF;
+ unsigned i;
+ int ret;
+ u64 v;
+
+ for (i = 0; i < bch2_opts_nr; i++) {
+ if (!strs.by_id[i])
+ continue;
+
+ ret = bch2_opt_parse(NULL,
+ &bch2_opt_table[i],
+ strs.by_id[i], &v, &err);
+ if (ret < 0 && ret != -BCH_ERR_option_needs_open_fs)
+ die("Invalid option %s", err.buf);
+
+ bch2_opt_set_by_id(&opts, i, v);
+ }
+
+ printbuf_exit(&err);
+ return opts;
+}
+
+#define newline(c) \
+ do { \
+ printf("\n"); \
+ c = 0; \
+ } while(0)
+void bch2_opts_usage(unsigned opt_types)
+{
+ const struct bch_option *opt;
+ unsigned i, c = 0, helpcol = 30;
+
+
+
+ for (opt = bch2_opt_table;
+ opt < bch2_opt_table + bch2_opts_nr;
+ opt++) {
+ if (!(opt->flags & opt_types))
+ continue;
+
+ c += printf(" --%s", opt->attr.name);
+
+ switch (opt->type) {
+ case BCH_OPT_BOOL:
+ break;
+ case BCH_OPT_STR:
+ c += printf("=(");
+ for (i = 0; opt->choices[i]; i++) {
+ if (i)
+ c += printf("|");
+ c += printf("%s", opt->choices[i]);
+ }
+ c += printf(")");
+ break;
+ default:
+ c += printf("=%s", opt->hint);
+ break;
+ }
+
+ if (opt->help) {
+ const char *l = opt->help;
+
+ if (c >= helpcol)
+ newline(c);
+
+ while (1) {
+ const char *n = strchrnul(l, '\n');
+
+ while (c < helpcol) {
+ putchar(' ');
+ c++;
+ }
+ printf("%.*s", (int) (n - l), l);
+ newline(c);
+
+ if (!*n)
+ break;
+ l = n + 1;
+ }
+ } else {
+ newline(c);
+ }
+ }
+}
+
+dev_names bchu_fs_get_devices(struct bchfs_handle fs)
+{
+ DIR *dir = fdopendir(fs.sysfs_fd);
+ struct dirent *d;
+ dev_names devs;
+
+ darray_init(&devs);
+
+ while ((errno = 0), (d = readdir(dir))) {
+ struct dev_name n = { 0, NULL, NULL };
+
+ if (sscanf(d->d_name, "dev-%u", &n.idx) != 1)
+ continue;
+
+ char *block_attr = mprintf("dev-%u/block", n.idx);
+
+ char sysfs_block_buf[4096];
+ ssize_t r = readlinkat(fs.sysfs_fd, block_attr,
+ sysfs_block_buf, sizeof(sysfs_block_buf));
+ if (r > 0) {
+ sysfs_block_buf[r] = '\0';
+ n.dev = strdup(basename(sysfs_block_buf));
+ }
+
+ free(block_attr);
+
+ char *label_attr = mprintf("dev-%u/label", n.idx);
+ n.label = read_file_str(fs.sysfs_fd, label_attr);
+ free(label_attr);
+
+ char *durability_attr = mprintf("dev-%u/durability", n.idx);
+ n.durability = read_file_u64(fs.sysfs_fd, durability_attr);
+ free(durability_attr);
+
+ darray_push(&devs, n);
+ }
+
+ closedir(dir);
+
+ return devs;
+}
diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h
new file mode 100644
index 00000000..fc6eb8bf
--- /dev/null
+++ b/c_src/libbcachefs.h
@@ -0,0 +1,300 @@
+#ifndef _LIBBCACHE_H
+#define _LIBBCACHE_H
+
+#include <linux/uuid.h>
+#include <stdbool.h>
+
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bcachefs_format.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/inode.h"
+#include "libbcachefs/opts.h"
+#include "libbcachefs/vstructs.h"
+#include "tools-util.h"
+
+/* option parsing */
+
+#define SUPERBLOCK_SIZE_DEFAULT 2048 /* 1 MB */
+
+struct bch_opt_strs {
+union {
+ char *by_id[bch2_opts_nr];
+struct {
+#define x(_name, ...) char *_name;
+ BCH_OPTS()
+#undef x
+};
+};
+};
+
+void bch2_opt_strs_free(struct bch_opt_strs *);
+struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned);
+struct bch_opts bch2_parse_opts(struct bch_opt_strs);
+void bch2_opts_usage(unsigned);
+
+struct format_opts {
+ char *label;
+ __uuid_t uuid;
+ unsigned version;
+ unsigned superblock_size;
+ bool encrypted;
+ char *passphrase;
+ char *source;
+};
+
+static inline struct format_opts format_opts_default()
+{
+ unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
+ ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
+ : bcachefs_metadata_version_current;
+
+ return (struct format_opts) {
+ .version = version,
+ .superblock_size = SUPERBLOCK_SIZE_DEFAULT,
+ };
+}
+
+struct dev_opts {
+ struct file *file;
+ struct block_device *bdev;
+ char *path;
+ u64 size; /* bytes*/
+ u64 bucket_size; /* bytes */
+ const char *label;
+ unsigned data_allowed;
+ unsigned durability;
+ bool discard;
+
+ u64 nbuckets;
+
+ u64 sb_offset;
+ u64 sb_end;
+};
+
+static inline struct dev_opts dev_opts_default()
+{
+ return (struct dev_opts) {
+ .data_allowed = ~0U << 2,
+ .durability = 1,
+ };
+}
+
+u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *);
+void bch2_check_bucket_size(struct bch_opts, struct dev_opts *);
+
+struct bch_sb *bch2_format(struct bch_opt_strs,
+ struct bch_opts,
+ struct format_opts, struct dev_opts *, size_t);
+
+void bch2_super_write(int, struct bch_sb *);
+struct bch_sb *__bch2_super_read(int, u64);
+
+/* ioctl interface: */
+
+int bcachectl_open(void);
+
+struct bchfs_handle {
+ __uuid_t uuid;
+ int ioctl_fd;
+ int sysfs_fd;
+};
+
+void bcache_fs_close(struct bchfs_handle);
+struct bchfs_handle bcache_fs_open(const char *);
+struct bchfs_handle bchu_fs_open_by_dev(const char *, int *);
+int bchu_dev_path_to_idx(struct bchfs_handle, const char *);
+
+static inline void bchu_disk_add(struct bchfs_handle fs, char *dev)
+{
+ struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &i);
+}
+
+static inline void bchu_disk_remove(struct bchfs_handle fs, unsigned dev_idx,
+ unsigned flags)
+{
+ struct bch_ioctl_disk i = {
+ .flags = flags|BCH_BY_INDEX,
+ .dev = dev_idx,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &i);
+}
+
+static inline void bchu_disk_online(struct bchfs_handle fs, char *dev)
+{
+ struct bch_ioctl_disk i = { .dev = (unsigned long) dev, };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ONLINE, &i);
+}
+
+static inline void bchu_disk_offline(struct bchfs_handle fs, unsigned dev_idx,
+ unsigned flags)
+{
+ struct bch_ioctl_disk i = {
+ .flags = flags|BCH_BY_INDEX,
+ .dev = dev_idx,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_OFFLINE, &i);
+}
+
+static inline void bchu_disk_set_state(struct bchfs_handle fs, unsigned dev,
+ unsigned new_state, unsigned flags)
+{
+ struct bch_ioctl_disk_set_state i = {
+ .flags = flags|BCH_BY_INDEX,
+ .new_state = new_state,
+ .dev = dev,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &i);
+}
+
+static inline struct bch_ioctl_fs_usage *bchu_fs_usage(struct bchfs_handle fs)
+{
+ struct bch_ioctl_fs_usage *u = NULL;
+ size_t replica_entries_bytes = 4096;
+
+ while (1) {
+ u = xrealloc(u, sizeof(*u) + replica_entries_bytes);
+ u->replica_entries_bytes = replica_entries_bytes;
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_FS_USAGE, u))
+ return u;
+
+ if (errno != ERANGE)
+ die("BCH_IOCTL_USAGE error: %m");
+
+ replica_entries_bytes *= 2;
+ }
+}
+
+static inline struct bch_ioctl_query_accounting *bchu_fs_accounting(struct bchfs_handle fs,
+ unsigned typemask)
+{
+ unsigned accounting_u64s = 128;
+ struct bch_ioctl_query_accounting *ret = NULL;
+
+ while (1) {
+ ret = xrealloc(ret, sizeof(*ret) + accounting_u64s * sizeof(u64));
+
+ memset(ret, 0, sizeof(*ret));
+
+ ret->accounting_u64s = accounting_u64s;
+ ret->accounting_types_mask = typemask;
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_QUERY_ACCOUNTING, ret))
+ return ret;
+
+ if (errno == ENOTTY)
+ return NULL;
+
+ if (errno == ERANGE) {
+ accounting_u64s *= 2;
+ continue;
+ }
+
+ die("BCH_IOCTL_USAGE error: %m");
+ }
+}
+
+static inline struct bch_ioctl_dev_usage_v2 *bchu_dev_usage(struct bchfs_handle fs,
+ unsigned idx)
+{
+ struct bch_ioctl_dev_usage_v2 *u = xcalloc(sizeof(*u) + sizeof(u->d[0]) * BCH_DATA_NR, 1);
+
+ u->dev = idx;
+ u->flags = BCH_BY_INDEX;
+ u->nr_data_types = BCH_DATA_NR;
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE_V2, u))
+ return u;
+
+ struct bch_ioctl_dev_usage u_v1 = { .dev = idx, .flags = BCH_BY_INDEX};
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE, &u_v1);
+
+ u->state = u_v1.state;
+ u->nr_data_types = ARRAY_SIZE(u_v1.d);
+ u->bucket_size = u_v1.bucket_size;
+ u->nr_buckets = u_v1.nr_buckets;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(u_v1.d); i++)
+ u->d[i] = u_v1.d[i];
+
+ return u;
+}
+
+static inline struct bch_sb *bchu_read_super(struct bchfs_handle fs, unsigned idx)
+{
+ size_t size = 4096;
+ struct bch_sb *sb = NULL;
+
+ while (1) {
+ sb = xrealloc(sb, size);
+ struct bch_ioctl_read_super i = {
+ .size = size,
+ .sb = (unsigned long) sb,
+ };
+
+ if (idx != -1) {
+ i.flags |= BCH_READ_DEV|BCH_BY_INDEX;
+ i.dev = idx;
+ }
+
+ if (!ioctl(fs.ioctl_fd, BCH_IOCTL_READ_SUPER, &i))
+ return sb;
+ if (errno != ERANGE)
+ die("BCH_IOCTL_READ_SUPER error: %m");
+ size *= 2;
+ }
+}
+
+static inline unsigned bchu_disk_get_idx(struct bchfs_handle fs, dev_t dev)
+{
+ struct bch_ioctl_disk_get_idx i = { .dev = dev };
+
+ return xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_GET_IDX, &i);
+}
+
+static inline void bchu_disk_resize(struct bchfs_handle fs,
+ unsigned idx,
+ u64 nbuckets)
+{
+ struct bch_ioctl_disk_resize i = {
+ .flags = BCH_BY_INDEX,
+ .dev = idx,
+ .nbuckets = nbuckets,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE, &i);
+}
+
+static inline void bchu_disk_resize_journal(struct bchfs_handle fs,
+ unsigned idx,
+ u64 nbuckets)
+{
+ struct bch_ioctl_disk_resize i = {
+ .flags = BCH_BY_INDEX,
+ .dev = idx,
+ .nbuckets = nbuckets,
+ };
+
+ xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE_JOURNAL, &i);
+}
+
+int bchu_data(struct bchfs_handle, struct bch_ioctl_data);
+
+struct dev_name {
+ unsigned idx;
+ char *dev;
+ char *label;
+ uuid_t uuid;
+ unsigned durability;
+};
+typedef DARRAY(struct dev_name) dev_names;
+
+dev_names bchu_fs_get_devices(struct bchfs_handle);
+
+#endif /* _LIBBCACHE_H */
diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c
new file mode 100644
index 00000000..d4701263
--- /dev/null
+++ b/c_src/posix_to_bcachefs.c
@@ -0,0 +1,461 @@
+#include <dirent.h>
+#include <sys/xattr.h>
+#include <linux/xattr.h>
+
+#include "posix_to_bcachefs.h"
+#include "libbcachefs/alloc_foreground.h"
+#include "libbcachefs/buckets.h"
+#include "libbcachefs/fs-common.h"
+#include "libbcachefs/io_write.h"
+#include "libbcachefs/str_hash.h"
+#include "libbcachefs/xattr.h"
+
+void update_inode(struct bch_fs *c,
+ struct bch_inode_unpacked *inode)
+{
+ struct bkey_inode_buf packed;
+ int ret;
+
+ bch2_inode_pack(&packed, inode);
+ packed.inode.k.p.snapshot = U32_MAX;
+ ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
+ NULL, 0, BTREE_ITER_cached);
+ if (ret)
+ die("error updating inode: %s", bch2_err_str(ret));
+}
+
+void create_link(struct bch_fs *c,
+ struct bch_inode_unpacked *parent,
+ const char *name, u64 inum, mode_t mode)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked parent_u;
+ struct bch_inode_unpacked inode;
+
+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_link_trans(trans,
+ (subvol_inum) { 1, parent->bi_inum }, &parent_u,
+ (subvol_inum) { 1, inum }, &inode, &qstr));
+ if (ret)
+ die("error creating hardlink: %s", bch2_err_str(ret));
+}
+
+struct bch_inode_unpacked create_file(struct bch_fs *c,
+ struct bch_inode_unpacked *parent,
+ const char *name,
+ uid_t uid, gid_t gid,
+ mode_t mode, dev_t rdev)
+{
+ struct qstr qstr = QSTR(name);
+ struct bch_inode_unpacked new_inode;
+
+ bch2_inode_init_early(c, &new_inode);
+
+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_create_trans(trans,
+ (subvol_inum) { 1, parent->bi_inum }, parent,
+ &new_inode, &qstr,
+ uid, gid, mode, rdev, NULL, NULL,
+ (subvol_inum) {}, 0));
+ if (ret)
+ die("error creating %s: %s", name, bch2_err_str(ret));
+
+ return new_inode;
+}
+
+#define for_each_xattr_handler(handlers, handler) \
+ if (handlers) \
+ for ((handler) = *(handlers)++; \
+ (handler) != NULL; \
+ (handler) = *(handlers)++)
+
+static const struct xattr_handler *xattr_resolve_name(char **name)
+{
+ const struct xattr_handler * const *handlers = bch2_xattr_handlers;
+ const struct xattr_handler *handler;
+
+ for_each_xattr_handler(handlers, handler) {
+ char *n;
+
+ n = strcmp_prefix(*name, xattr_prefix(handler));
+ if (n) {
+ if (!handler->prefix ^ !*n) {
+ if (*n)
+ continue;
+ return ERR_PTR(-EINVAL);
+ }
+ *name = n;
+ return handler;
+ }
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
+void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ struct stat *src)
+{
+ dst->bi_atime = timespec_to_bch2_time(c, src->st_atim);
+ dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim);
+ dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim);
+}
+
+void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ char *src)
+{
+ struct bch_hash_info hash_info = bch2_hash_info_init(c, dst);
+
+ char attrs[XATTR_LIST_MAX];
+ ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs));
+ if (attrs_size < 0)
+ die("listxattr error: %m");
+
+ char *next, *attr;
+ for (attr = attrs;
+ attr < attrs + attrs_size;
+ attr = next) {
+ next = attr + strlen(attr) + 1;
+
+ char val[XATTR_SIZE_MAX];
+ ssize_t val_size = lgetxattr(src, attr, val, sizeof(val));
+
+ if (val_size < 0)
+ die("error getting xattr val: %m");
+
+ const struct xattr_handler *h = xattr_resolve_name(&attr);
+ if (IS_ERR(h))
+ continue;
+
+ int ret = bch2_trans_commit_do(c, NULL, NULL, 0,
+ bch2_xattr_set(trans,
+ (subvol_inum) { 1, dst->bi_inum },
+ dst, &hash_info, attr,
+ val, val_size, h->flags, 0));
+ if (ret < 0)
+ die("error creating xattr: %s", bch2_err_str(ret));
+ }
+}
+
+#define WRITE_DATA_BUF (1 << 20)
+
+static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE);
+
+static void write_data(struct bch_fs *c,
+ struct bch_inode_unpacked *dst_inode,
+ u64 dst_offset, void *buf, size_t len)
+{
+ struct bch_write_op op;
+ struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE];
+
+ BUG_ON(dst_offset & (block_bytes(c) - 1));
+ BUG_ON(len & (block_bytes(c) - 1));
+ BUG_ON(len > WRITE_DATA_BUF);
+
+ bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0);
+ bch2_bio_map(&op.wbio.bio, buf, len);
+
+ bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts));
+ op.write_point = writepoint_hashed(0);
+ op.nr_replicas = 1;
+ op.subvol = 1;
+ op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX);
+ op.flags |= BCH_WRITE_SYNC;
+
+ int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
+ c->opts.data_replicas, 0);
+ if (ret)
+ die("error reserving space in new filesystem: %s", bch2_err_str(ret));
+
+ closure_call(&op.cl, bch2_write, NULL, NULL);
+
+ BUG_ON(!(op.flags & BCH_WRITE_SUBMITTED));
+ dst_inode->bi_sectors += len >> 9;
+
+ if (op.error)
+ die("write error: %s", bch2_err_str(op.error));
+}
+
+void copy_data(struct bch_fs *c,
+ struct bch_inode_unpacked *dst_inode,
+ int src_fd, u64 start, u64 end)
+{
+ while (start < end) {
+ unsigned len = min_t(u64, end - start, sizeof(buf));
+ unsigned pad = round_up(len, block_bytes(c)) - len;
+
+ xpread(src_fd, buf, len, start);
+ memset(buf + len, 0, pad);
+
+ write_data(c, dst_inode, start, buf, len + pad);
+ start += len;
+ }
+}
+
+static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ u64 logical, u64 physical, u64 length)
+{
+ struct bch_dev *ca = c->devs[0];
+
+ BUG_ON(logical & (block_bytes(c) - 1));
+ BUG_ON(physical & (block_bytes(c) - 1));
+ BUG_ON(length & (block_bytes(c) - 1));
+
+ logical >>= 9;
+ physical >>= 9;
+ length >>= 9;
+
+ BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets));
+
+ while (length) {
+ struct bkey_i_extent *e;
+ BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k;
+ u64 b = sector_to_bucket(ca, physical);
+ struct disk_reservation res;
+ unsigned sectors;
+ int ret;
+
+ sectors = min(ca->mi.bucket_size -
+ (physical & (ca->mi.bucket_size - 1)),
+ length);
+
+ e = bkey_extent_init(&k.k);
+ e->k.p.inode = dst->bi_inum;
+ e->k.p.offset = logical + sectors;
+ e->k.p.snapshot = U32_MAX;
+ e->k.size = sectors;
+ bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) {
+ .offset = physical,
+ .dev = 0,
+ .gen = *bucket_gen(ca, b),
+ });
+
+ ret = bch2_disk_reservation_get(c, &res, sectors, 1,
+ BCH_DISK_RESERVATION_NOFAIL);
+ if (ret)
+ die("error reserving space in new filesystem: %s",
+ bch2_err_str(ret));
+
+ ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0, 0);
+ if (ret)
+ die("btree insert error %s", bch2_err_str(ret));
+
+ bch2_disk_reservation_put(c, &res);
+
+ dst->bi_sectors += sectors;
+ logical += sectors;
+ physical += sectors;
+ length -= sectors;
+ }
+}
+
+void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ char *src)
+{
+ ssize_t i;
+ ssize_t ret = readlink(src, buf, sizeof(buf));
+ if (ret < 0)
+ die("readlink error: %m");
+
+ for (i = ret; i < round_up(ret, block_bytes(c)); i++)
+ buf[i] = 0;
+
+ write_data(c, dst, 0, buf, round_up(ret, block_bytes(c)));
+}
+
+static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst,
+ int src_fd, u64 src_size,
+ char *src_path, struct copy_fs_state *s)
+{
+ struct fiemap_iter iter;
+ struct fiemap_extent e;
+
+ fiemap_for_each(src_fd, iter, e)
+ if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) {
+ fsync(src_fd);
+ break;
+ }
+ fiemap_iter_exit(&iter);
+
+ fiemap_for_each(src_fd, iter, e) {
+ u64 src_max = roundup(src_size, block_bytes(c));
+
+ e.fe_length = min(e.fe_length, src_max - e.fe_logical);
+
+ if ((e.fe_logical & (block_bytes(c) - 1)) ||
+ (e.fe_length & (block_bytes(c) - 1)))
+ die("Unaligned extent in %s - can't handle", src_path);
+
+ if (BCH_MIGRATE_copy == s->type || (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN|
+ FIEMAP_EXTENT_ENCODED|
+ FIEMAP_EXTENT_NOT_ALIGNED|
+ FIEMAP_EXTENT_DATA_INLINE))) {
+ copy_data(c, dst, src_fd, e.fe_logical,
+ e.fe_logical + min(src_size - e.fe_logical,
+ e.fe_length));
+ continue;
+ }
+
+ /*
+ * if the data is below 1 MB, copy it so it doesn't conflict
+ * with bcachefs's potentially larger superblock:
+ */
+ if (e.fe_physical < 1 << 20) {
+ copy_data(c, dst, src_fd, e.fe_logical,
+ e.fe_logical + min(src_size - e.fe_logical,
+ e.fe_length));
+ continue;
+ }
+
+ if ((e.fe_physical & (block_bytes(c) - 1)))
+ die("Unaligned extent in %s - can't handle", src_path);
+
+ range_add(&s->extents, e.fe_physical, e.fe_length);
+ link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length);
+ }
+ fiemap_iter_exit(&iter);
+}
+
+static void copy_dir(struct copy_fs_state *s,
+ struct bch_fs *c,
+ struct bch_inode_unpacked *dst,
+ int src_fd, const char *src_path)
+{
+ DIR *dir = fdopendir(src_fd);
+ struct dirent *d;
+
+ while ((errno = 0), (d = readdir(dir))) {
+ struct bch_inode_unpacked inode;
+ int fd;
+
+ if (fchdir(src_fd))
+ die("chdir error: %m");
+
+ struct stat stat =
+ xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW);
+
+ if (!strcmp(d->d_name, ".") ||
+ !strcmp(d->d_name, "..") ||
+ !strcmp(d->d_name, "lost+found"))
+ continue;
+
+ if (BCH_MIGRATE_migrate == s->type && stat.st_ino == s->bcachefs_inum)
+ continue;
+
+ char *child_path = mprintf("%s/%s", src_path, d->d_name);
+
+ if (s->type == BCH_MIGRATE_migrate && stat.st_dev != s->dev)
+ die("%s does not have correct st_dev!", child_path);
+
+ u64 *dst_inum = S_ISREG(stat.st_mode)
+ ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL)
+ : NULL;
+
+ if (dst_inum && *dst_inum) {
+ create_link(c, dst, d->d_name, *dst_inum, S_IFREG);
+ goto next;
+ }
+
+ inode = create_file(c, dst, d->d_name,
+ stat.st_uid, stat.st_gid,
+ stat.st_mode, stat.st_rdev);
+
+ if (dst_inum)
+ *dst_inum = inode.bi_inum;
+
+ copy_xattrs(c, &inode, d->d_name);
+
+ /* copy xattrs */
+
+ switch (mode_to_type(stat.st_mode)) {
+ case DT_DIR:
+ fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+ copy_dir(s, c, &inode, fd, child_path);
+ close(fd);
+ break;
+ case DT_REG:
+ inode.bi_size = stat.st_size;
+
+ fd = xopen(d->d_name, O_RDONLY|O_NOATIME);
+ copy_file(c, &inode, fd, stat.st_size,
+ child_path, s);
+ close(fd);
+ break;
+ case DT_LNK:
+ inode.bi_size = stat.st_size;
+
+ copy_link(c, &inode, d->d_name);
+ break;
+ case DT_FIFO:
+ case DT_CHR:
+ case DT_BLK:
+ case DT_SOCK:
+ case DT_WHT:
+ /* nothing else to copy for these: */
+ break;
+ default:
+ BUG();
+ }
+
+ copy_times(c, &inode, &stat);
+ update_inode(c, &inode);
+next:
+ free(child_path);
+ }
+
+ if (errno)
+ die("readdir error: %m");
+ closedir(dir);
+}
+
+static void reserve_old_fs_space(struct bch_fs *c,
+ struct bch_inode_unpacked *root_inode,
+ ranges *extents)
+{
+ struct bch_dev *ca = c->devs[0];
+ struct bch_inode_unpacked dst;
+ struct hole_iter iter;
+ struct range i;
+
+ dst = create_file(c, root_inode, "old_migrated_filesystem",
+ 0, 0, S_IFREG|0400, 0);
+ dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9;
+
+ ranges_sort_merge(extents);
+
+ for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i)
+ link_data(c, &dst, i.start, i.start, i.end - i.start);
+
+ update_inode(c, &dst);
+}
+
+void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+ struct copy_fs_state *s)
+{
+ syncfs(src_fd);
+
+ struct bch_inode_unpacked root_inode;
+ int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
+ &root_inode);
+ if (ret)
+ die("error looking up root directory: %s", bch2_err_str(ret));
+
+ if (fchdir(src_fd))
+ die("chdir error: %m");
+
+ struct stat stat = xfstat(src_fd);
+ copy_times(c, &root_inode, &stat);
+ copy_xattrs(c, &root_inode, ".");
+
+
+ /* now, copy: */
+ copy_dir(s, c, &root_inode, src_fd, src_path);
+
+ if (BCH_MIGRATE_migrate == s->type)
+ reserve_old_fs_space(c, &root_inode, &s->extents);
+
+ update_inode(c, &root_inode);
+
+ if (BCH_MIGRATE_migrate == s->type)
+ darray_exit(&s->extents);
+
+ genradix_free(&s->hardlinks);
+}
diff --git a/c_src/posix_to_bcachefs.h b/c_src/posix_to_bcachefs.h
new file mode 100644
index 00000000..facb75ed
--- /dev/null
+++ b/c_src/posix_to_bcachefs.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _POSIX_TO_BCACHEFS_H
+#define _POSIX_TO_BCACHEFS_H
+
+/*
+ * This header exports the functionality needed for copying data from existing
+ * posix compliant filesystems to bcachefs. There are two use cases:
+ * 1. Creating a new bcachefs filesystem using `bcachefs format`, we can
+ * specify a source directory tree which will be copied over the new
+ * bcachefs filesytem.
+ * 2. Migrating an existing filesystem in place, with `bcachefs migrate`.
+ * This will allocate space for the bcachefs metadata, but the actual data
+ * represented by the extents will not be duplicated. The bcachefs metadata
+ * will simply point to the existing extents.
+ *
+ * To avoid code duplication, `copy_fs` deals with both cases. See the function
+ * documentation for more details.
+ */
+
+#include "libbcachefs.h"
+
+enum bch_migrate_type {
+ BCH_MIGRATE_copy,
+ BCH_MIGRATE_migrate
+};
+
+/*
+ * The migrate action uses all the fields in this struct.
+ * The copy action only uses the `hardlinks` field. Since `hardlinks` is
+ * initialized with zeroes, an empty `copy_fs_state` struct can be passed.
+ */
+struct copy_fs_state {
+ u64 bcachefs_inum;
+ dev_t dev;
+
+ GENRADIX(u64) hardlinks;
+ ranges extents;
+ enum bch_migrate_type type;
+};
+
+/*
+ * The `copy_fs` function is used for both copying a directory tree to a new
+ * bcachefs filesystem and migrating an existing one, depending on the value
+ * from the `type` field in `copy_fs_state` struct.
+ *
+ * In case of copy, an empty `copy_fs_state` structure is passed to `copy_fs`
+ * (only the `hardlinks` field is used, and that is initialized with zeroes).
+ *
+ * In the migrate case, all the fields from `copy_fs_state` need to be
+ * initialized (`hardlinks` is initialized with zeroes).
+ */
+void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
+ struct copy_fs_state *s);
+#endif /* _LIBBCACHE_H */
diff --git a/c_src/qcow2.c b/c_src/qcow2.c
new file mode 100644
index 00000000..30a6e056
--- /dev/null
+++ b/c_src/qcow2.c
@@ -0,0 +1,134 @@
+
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "qcow2.h"
+#include "tools-util.h"
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 2
+#define QCOW_OFLAG_COPIED (1LL << 63)
+
+struct qcow2_hdr {
+ u32 magic;
+ u32 version;
+
+ u64 backing_file_offset;
+ u32 backing_file_size;
+
+ u32 block_bits;
+ u64 size;
+ u32 crypt_method;
+
+ u32 l1_size;
+ u64 l1_table_offset;
+
+ u64 refcount_table_offset;
+ u32 refcount_table_blocks;
+
+ u32 nb_snapshots;
+ u64 snapshots_offset;
+};
+
+struct qcow2_image {
+ int fd;
+ u32 block_size;
+ u64 *l1_table;
+ u64 l1_offset;
+ u32 l1_index;
+ u64 *l2_table;
+ u64 offset;
+};
+
+static void flush_l2(struct qcow2_image *img)
+{
+ if (img->l1_index != -1) {
+ img->l1_table[img->l1_index] =
+ cpu_to_be64(img->offset|QCOW_OFLAG_COPIED);
+ xpwrite(img->fd, img->l2_table, img->block_size, img->offset,
+ "qcow2 l2 table");
+ img->offset += img->block_size;
+
+ memset(img->l2_table, 0, img->block_size);
+ img->l1_index = -1;
+ }
+}
+
+static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset)
+{
+ unsigned l2_size = img->block_size / sizeof(u64);
+ u64 l1_index = src_blk / l2_size;
+ u64 l2_index = src_blk & (l2_size - 1);
+
+ if (img->l1_index != l1_index) {
+ flush_l2(img);
+ img->l1_index = l1_index;
+ }
+
+ img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED);
+}
+
+void qcow2_write_image(int infd, int outfd, ranges *data,
+ unsigned block_size)
+{
+ u64 image_size = get_size(infd);
+ unsigned l2_size = block_size / sizeof(u64);
+ unsigned l1_size = DIV_ROUND_UP(image_size, (u64) block_size * l2_size);
+ struct qcow2_hdr hdr = { 0 };
+ struct qcow2_image img = {
+ .fd = outfd,
+ .block_size = block_size,
+ .l2_table = xcalloc(l2_size, sizeof(u64)),
+ .l1_table = xcalloc(l1_size, sizeof(u64)),
+ .l1_index = -1,
+ .offset = round_up(sizeof(hdr), block_size),
+ };
+ char *buf = xmalloc(block_size);
+ u64 src_offset, dst_offset;
+
+ assert(is_power_of_2(block_size));
+
+ ranges_roundup(data, block_size);
+ ranges_sort_merge(data);
+
+ /* Write data: */
+ darray_for_each(*data, r)
+ for (src_offset = r->start;
+ src_offset < r->end;
+ src_offset += block_size) {
+ dst_offset = img.offset;
+ img.offset += img.block_size;
+
+ xpread(infd, buf, block_size, src_offset);
+ xpwrite(outfd, buf, block_size, dst_offset,
+ "qcow2 data");
+
+ add_l2(&img, src_offset / block_size, dst_offset);
+ }
+
+ flush_l2(&img);
+
+ /* Write L1 table: */
+ dst_offset = img.offset;
+ img.offset += round_up(l1_size * sizeof(u64), block_size);
+ xpwrite(img.fd, img.l1_table, l1_size * sizeof(u64), dst_offset,
+ "qcow2 l1 table");
+
+ /* Write header: */
+ hdr.magic = cpu_to_be32(QCOW_MAGIC);
+ hdr.version = cpu_to_be32(QCOW_VERSION);
+ hdr.block_bits = cpu_to_be32(ilog2(block_size));
+ hdr.size = cpu_to_be64(image_size);
+ hdr.l1_size = cpu_to_be32(l1_size);
+ hdr.l1_table_offset = cpu_to_be64(dst_offset);
+
+ memset(buf, 0, block_size);
+ memcpy(buf, &hdr, sizeof(hdr));
+ xpwrite(img.fd, buf, block_size, 0,
+ "qcow2 header");
+
+ free(img.l2_table);
+ free(img.l1_table);
+ free(buf);
+}
diff --git a/c_src/qcow2.h b/c_src/qcow2.h
new file mode 100644
index 00000000..0943d55c
--- /dev/null
+++ b/c_src/qcow2.h
@@ -0,0 +1,9 @@
+#ifndef _QCOW2_H
+#define _QCOW2_H
+
+#include <linux/types.h>
+#include "tools-util.h"
+
+void qcow2_write_image(int, int, ranges *, unsigned);
+
+#endif /* _QCOW2_H */
diff --git a/c_src/tools-util.c b/c_src/tools-util.c
new file mode 100644
index 00000000..3a76a02e
--- /dev/null
+++ b/c_src/tools-util.c
@@ -0,0 +1,741 @@
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/fs.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <blkid.h>
+#include <uuid/uuid.h>
+
+#include "libbcachefs.h"
+#include "libbcachefs/bcachefs_ioctl.h"
+#include "linux/sort.h"
+#include "tools-util.h"
+#include "libbcachefs/util.h"
+
+void die(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ fputc('\n', stderr);
+
+ _exit(EXIT_FAILURE);
+}
+
+char *mprintf(const char *fmt, ...)
+{
+ va_list args;
+ char *str;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vasprintf(&str, fmt, args);
+ va_end(args);
+
+ if (ret < 0)
+ die("insufficient memory");
+
+ return str;
+}
+
+void xpread(int fd, void *buf, size_t count, off_t offset)
+{
+ while (count) {
+ ssize_t r = pread(fd, buf, count, offset);
+
+ if (r < 0)
+ die("read error: %m");
+ if (!r)
+ die("pread error: unexpected eof");
+ count -= r;
+ offset += r;
+ }
+}
+
+void xpwrite(int fd, const void *buf, size_t count, off_t offset, const char *msg)
+{
+ ssize_t r = pwrite(fd, buf, count, offset);
+
+ if (r != count)
+ die("error writing %s (ret %zi err %m)", msg, r);
+}
+
+struct stat xfstatat(int dirfd, const char *path, int flags)
+{
+ struct stat stat;
+ if (fstatat(dirfd, path, &stat, flags))
+ die("stat error: %m");
+ return stat;
+}
+
+struct stat xfstat(int fd)
+{
+ struct stat stat;
+ if (fstat(fd, &stat))
+ die("stat error: %m");
+ return stat;
+}
+
+struct stat xstat(const char *path)
+{
+ struct stat statbuf;
+ if (stat(path, &statbuf))
+ die("stat error statting %s: %m", path);
+ return statbuf;
+}
+
+/* File parsing (i.e. sysfs) */
+
+void write_file_str(int dirfd, const char *path, const char *str)
+{
+ int fd = xopenat(dirfd, path, O_WRONLY);
+ ssize_t wrote, len = strlen(str);
+
+ wrote = write(fd, str, len);
+ if (wrote != len)
+ die("read error: %m");
+ close(fd);
+}
+
+char *read_file_str(int dirfd, const char *path)
+{
+ int fd = xopenat(dirfd, path, O_RDONLY);
+ ssize_t len = xfstat(fd).st_size;
+
+ char *buf = xmalloc(len + 1);
+
+ len = read(fd, buf, len);
+ if (len < 0)
+ die("read error: %m");
+
+ buf[len] = '\0';
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+ if (!strlen(buf)) {
+ free(buf);
+ buf = NULL;
+ }
+
+ close(fd);
+
+ return buf;
+}
+
+u64 read_file_u64(int dirfd, const char *path)
+{
+ char *buf = read_file_str(dirfd, path);
+ u64 v;
+ if (bch2_strtou64_h(buf, &v))
+ die("read_file_u64: error parsing %s (got %s)", path, buf);
+ free(buf);
+ return v;
+}
+
+/* String list options: */
+
+ssize_t read_string_list_or_die(const char *opt, const char * const list[],
+ const char *msg)
+{
+ ssize_t v = match_string(list, -1, opt);
+ if (v < 0)
+ die("Bad %s %s", msg, opt);
+
+ return v;
+}
+
+/* Returns size of file or block device: */
+u64 get_size(int fd)
+{
+ struct stat statbuf = xfstat(fd);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_size;
+
+ u64 ret;
+ xioctl(fd, BLKGETSIZE64, &ret);
+ return ret;
+}
+
+/* Returns blocksize, in bytes: */
+unsigned get_blocksize(int fd)
+{
+ struct stat statbuf = xfstat(fd);
+
+ if (!S_ISBLK(statbuf.st_mode))
+ return statbuf.st_blksize;
+
+ unsigned ret;
+ xioctl(fd, BLKPBSZGET, &ret);
+ return ret;
+}
+
+/* Open a block device, do magic blkid stuff to probe for existing filesystems: */
+int open_for_format(struct dev_opts *dev, bool force)
+{
+ int blkid_version_code = blkid_get_library_version(NULL, NULL);
+ if (blkid_version_code < 2401) {
+ if (force) {
+ fprintf(
+ stderr,
+ "Continuing with out of date libblkid %s because --force was passed.\n",
+ BLKID_VERSION);
+ } else {
+ // Reference for picking 2.40.1:
+ // https://mirrors.edge.kernel.org/pub/linux/utils/util-linux/v2.40/v2.40.1-ReleaseNotes
+ // https://github.com/util-linux/util-linux/issues/3103
+ die(
+ "Refusing to format when using libblkid %s\n"
+ "libblkid >= 2.40.1 is required to check for existing filesystems\n"
+ "Earlier versions may not recognize some bcachefs filesystems.\n", BLKID_VERSION);
+ }
+ }
+
+ blkid_probe pr;
+ const char *fs_type = NULL, *fs_label = NULL;
+ size_t fs_type_len, fs_label_len;
+
+ dev->file = bdev_file_open_by_path(dev->path,
+ BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED,
+ dev, NULL);
+ int ret = PTR_ERR_OR_ZERO(dev->file);
+ if (ret < 0)
+ die("Error opening device to format %s: %s", dev->path, strerror(-ret));
+ dev->bdev = file_bdev(dev->file);
+
+ if (!(pr = blkid_new_probe()))
+ die("blkid error 1");
+ if (blkid_probe_set_device(pr, dev->bdev->bd_fd, 0, 0))
+ die("blkid error 2");
+ if (blkid_probe_enable_partitions(pr, true) ||
+ blkid_probe_enable_superblocks(pr, true) ||
+ blkid_probe_set_superblocks_flags(pr,
+ BLKID_SUBLKS_LABEL|BLKID_SUBLKS_TYPE|BLKID_SUBLKS_MAGIC))
+ die("blkid error 3");
+ if (blkid_do_fullprobe(pr) < 0)
+ die("blkid error 4");
+
+ blkid_probe_lookup_value(pr, "TYPE", &fs_type, &fs_type_len);
+ blkid_probe_lookup_value(pr, "LABEL", &fs_label, &fs_label_len);
+
+ if (fs_type) {
+ if (fs_label)
+ printf("%s contains a %s filesystem labelled '%s'\n",
+ dev->path, fs_type, fs_label);
+ else
+ printf("%s contains a %s filesystem\n",
+ dev->path, fs_type);
+ if (!force) {
+ fputs("Proceed anyway?", stdout);
+ if (!ask_yn())
+ exit(EXIT_FAILURE);
+ }
+ while (blkid_do_probe(pr) == 0) {
+ if (blkid_do_wipe(pr, 0))
+ die("Failed to wipe preexisting metadata.");
+ }
+ }
+
+ blkid_free_probe(pr);
+ return ret;
+}
+
+bool ask_yn(void)
+{
+ const char *short_yes = "yY";
+ char *buf = NULL;
+ size_t buflen = 0;
+ bool ret;
+
+ fputs(" (y,n) ", stdout);
+ fflush(stdout);
+
+ if (getline(&buf, &buflen, stdin) < 0)
+ die("error reading from standard input");
+
+ ret = strchr(short_yes, buf[0]);
+ free(buf);
+ return ret;
+}
+
+static int range_cmp(const void *_l, const void *_r)
+{
+ const struct range *l = _l, *r = _r;
+
+ if (l->start < r->start)
+ return -1;
+ if (l->start > r->start)
+ return 1;
+ return 0;
+}
+
+void ranges_sort_merge(ranges *r)
+{
+ ranges tmp = { 0 };
+
+ sort(r->data, r->nr, sizeof(r->data[0]), range_cmp, NULL);
+
+ /* Merge contiguous ranges: */
+ darray_for_each(*r, i) {
+ struct range *t = tmp.nr ? &tmp.data[tmp.nr - 1] : NULL;
+
+ if (t && t->end >= i->start)
+ t->end = max(t->end, i->end);
+ else
+ darray_push(&tmp, *i);
+ }
+
+ darray_exit(r);
+ *r = tmp;
+}
+
+void ranges_roundup(ranges *r, unsigned block_size)
+{
+ darray_for_each(*r, i) {
+ i->start = round_down(i->start, block_size);
+ i->end = round_up(i->end, block_size);
+ }
+}
+
+void ranges_rounddown(ranges *r, unsigned block_size)
+{
+ darray_for_each(*r, i) {
+ i->start = round_up(i->start, block_size);
+ i->end = round_down(i->end, block_size);
+ i->end = max(i->end, i->start);
+ }
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter)
+{
+ struct fiemap_extent e;
+
+ BUG_ON(iter->idx > iter->f->fm_mapped_extents);
+
+ if (iter->idx == iter->f->fm_mapped_extents) {
+ xioctl(iter->fd, FS_IOC_FIEMAP, iter->f);
+
+ if (!iter->f->fm_mapped_extents)
+ return (struct fiemap_extent) { .fe_length = 0 };
+
+ iter->idx = 0;
+ }
+
+ e = iter->f->fm_extents[iter->idx++];
+ BUG_ON(!e.fe_length);
+
+ iter->f->fm_start = e.fe_logical + e.fe_length;
+
+ return e;
+}
+
+char *strcmp_prefix(char *a, const char *a_prefix)
+{
+ while (*a_prefix && *a == *a_prefix) {
+ a++;
+ a_prefix++;
+ }
+ return *a_prefix ? NULL : a;
+}
+
+/* crc32c */
+
+static u32 crc32c_default(u32 crc, const void *buf, size_t size)
+{
+ static const u32 crc32c_tab[] = {
+ 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
+ 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
+ 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
+ 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
+ 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
+ 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
+ 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
+ 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
+ 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
+ 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
+ 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
+ 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
+ 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
+ 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
+ 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
+ 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
+ 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
+ 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
+ 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
+ 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
+ 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
+ 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
+ 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
+ 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
+ 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
+ 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
+ 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
+ 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
+ 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
+ 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
+ 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
+ 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
+ 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
+ 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
+ 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
+ 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
+ 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
+ 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
+ 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
+ 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
+ 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
+ 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
+ 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
+ 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
+ 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
+ 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
+ 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
+ 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
+ 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
+ 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
+ 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
+ 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
+ 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
+ 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
+ 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
+ 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
+ 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
+ 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
+ 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
+ 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
+ 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
+ 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
+ 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
+ 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
+ };
+ const u8 *p = buf;
+
+ while (size--)
+ crc = crc32c_tab[(crc ^ *p++) & 0xFFL] ^ (crc >> 8);
+
+ return crc;
+}
+
+#include <linux/compiler.h>
+
+#ifdef __x86_64__
+
+#ifdef CONFIG_X86_64
+#define REX_PRE "0x48, "
+#else
+#define REX_PRE
+#endif
+
+static u32 crc32c_sse42(u32 crc, const void *buf, size_t size)
+{
+ while (size >= sizeof(long)) {
+ const unsigned long *d = buf;
+
+ __asm__ __volatile__(
+ ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
+ :"=S"(crc)
+ :"0"(crc), "c"(*d)
+ );
+ buf += sizeof(long);
+ size -= sizeof(long);
+ }
+
+ while (size) {
+ const u8 *d = buf;
+
+ __asm__ __volatile__(
+ ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
+ :"=S"(crc)
+ :"0"(crc), "c"(*d)
+ );
+ buf += 1;
+ size -= 1;
+ }
+
+ return crc;
+}
+
+#endif
+
+static void *resolve_crc32c(void)
+{
+#ifdef __x86_64__
+ if (__builtin_cpu_supports("sse4.2"))
+ return crc32c_sse42;
+#endif
+ return crc32c_default;
+}
+
+/*
+ * ifunc is buggy and I don't know what breaks it (LTO?)
+ */
+#ifdef HAVE_WORKING_IFUNC
+
+static void *ifunc_resolve_crc32c(void)
+{
+ __builtin_cpu_init();
+
+ return resolve_crc32c
+}
+
+u32 crc32c(u32, const void *, size_t)
+ __attribute__((ifunc("ifunc_resolve_crc32c")));
+
+#else
+
+u32 crc32c(u32 crc, const void *buf, size_t size)
+{
+ static u32 (*real_crc32c)(u32, const void *, size_t);
+
+ if (unlikely(!real_crc32c))
+ real_crc32c = resolve_crc32c();
+
+ return real_crc32c(crc, buf, size);
+}
+
+#endif /* HAVE_WORKING_IFUNC */
+
+char *dev_to_name(dev_t dev)
+{
+ char *line = NULL, *name = NULL;
+ size_t n = 0;
+
+ FILE *f = fopen("/proc/partitions", "r");
+ if (!f)
+ die("error opening /proc/partitions: %m");
+
+ while (getline(&line, &n, f) != -1) {
+ unsigned ma, mi;
+ u64 sectors;
+
+ name = realloc(name, n + 1);
+
+ if (sscanf(line, " %u %u %llu %s", &ma, &mi, &sectors, name) == 4 &&
+ ma == major(dev) && mi == minor(dev))
+ goto found;
+ }
+
+ free(name);
+ name = NULL;
+found:
+ fclose(f);
+ free(line);
+ return name;
+}
+
+char *dev_to_path(dev_t dev)
+{
+ char *name = dev_to_name(dev);
+ if (!name)
+ return NULL;
+
+ char *path = mprintf("/dev/%s", name);
+
+ free(name);
+ return path;
+}
+
+struct mntent *dev_to_mount(char *dev)
+{
+ struct mntent *mnt, *ret = NULL;
+ FILE *f = setmntent("/proc/mounts", "r");
+ if (!f)
+ die("error opening /proc/mounts: %m");
+
+ struct stat d1 = xstat(dev);
+
+ while ((mnt = getmntent(f))) {
+ char *d, *p = mnt->mnt_fsname;
+
+ while ((d = strsep(&p, ":"))) {
+ struct stat d2;
+
+ if (stat(d, &d2))
+ continue;
+
+ if (S_ISBLK(d1.st_mode) != S_ISBLK(d2.st_mode))
+ continue;
+
+ if (S_ISBLK(d1.st_mode)) {
+ if (d1.st_rdev != d2.st_rdev)
+ continue;
+ } else {
+ if (d1.st_dev != d2.st_dev ||
+ d1.st_ino != d2.st_ino)
+ continue;
+ }
+
+ ret = mnt;
+ goto found;
+ }
+ }
+found:
+ fclose(f);
+ return ret;
+}
+
+int dev_mounted(char *dev)
+{
+ struct mntent *mnt = dev_to_mount(dev);
+
+ if (!mnt)
+ return 0;
+ if (hasmntopt(mnt, "ro"))
+ return 1;
+ return 2;
+}
+
+static char *dev_to_sysfs_path(dev_t dev)
+{
+ return mprintf("/sys/dev/block/%u:%u", major(dev), minor(dev));
+}
+
+char *fd_to_dev_model(int fd)
+{
+ struct stat stat = xfstat(fd);
+
+ if (S_ISBLK(stat.st_mode)) {
+ char *sysfs_path = dev_to_sysfs_path(stat.st_rdev);
+
+ char *model_path = mprintf("%s/device/model", sysfs_path);
+ if (!access(model_path, R_OK))
+ goto got_model;
+ free(model_path);
+
+ /* partition? try parent */
+
+ char buf[1024];
+ if (readlink(sysfs_path, buf, sizeof(buf)) < 0)
+ die("readlink error on %s: %m", sysfs_path);
+
+ free(sysfs_path);
+ sysfs_path = strdup(buf);
+
+ *strrchr(sysfs_path, '/') = 0;
+ model_path = mprintf("%s/device/model", sysfs_path);
+ if (!access(model_path, R_OK))
+ goto got_model;
+
+ return strdup("(unknown device)");
+ char *model;
+got_model:
+ model = read_file_str(AT_FDCWD, model_path);
+ free(model_path);
+ free(sysfs_path);
+ return model;
+ } else {
+ return strdup("(reg file)");
+ }
+}
+
+static int kstrtoull_symbolic(const char *s, unsigned int base, unsigned long long *res)
+{
+ if (!strcmp(s, "U64_MAX")) {
+ *res = U64_MAX;
+ return 0;
+ }
+
+ if (!strcmp(s, "U32_MAX")) {
+ *res = U32_MAX;
+ return 0;
+ }
+
+ return kstrtoull(s, base, res);
+}
+
+static int kstrtouint_symbolic(const char *s, unsigned int base, unsigned *res)
+{
+ unsigned long long tmp;
+ int rv;
+
+ rv = kstrtoull_symbolic(s, base, &tmp);
+ if (rv < 0)
+ return rv;
+ if (tmp != (unsigned long long)(unsigned int)tmp)
+ return -ERANGE;
+ *res = tmp;
+ return 0;
+}
+
+struct bpos bpos_parse(char *buf)
+{
+ char *orig = strdup(buf);
+ char *s = buf;
+
+ char *inode_s = strsep(&s, ":");
+ char *offset_s = strsep(&s, ":");
+ char *snapshot_s = strsep(&s, ":");
+
+ if (!inode_s || !offset_s || s)
+ die("invalid bpos %s", orig);
+ free(orig);
+
+ u64 inode_v = 0, offset_v = 0;
+ u32 snapshot_v = 0;
+ if (kstrtoull_symbolic(inode_s, 10, &inode_v))
+ die("invalid bpos.inode %s", inode_s);
+
+ if (kstrtoull_symbolic(offset_s, 10, &offset_v))
+ die("invalid bpos.offset %s", offset_s);
+
+ if (snapshot_s &&
+ kstrtouint_symbolic(snapshot_s, 10, &snapshot_v))
+ die("invalid bpos.snapshot %s", snapshot_s);
+
+ return (struct bpos) { .inode = inode_v, .offset = offset_v, .snapshot = snapshot_v };
+}
+
+struct bbpos bbpos_parse(char *buf)
+{
+ char *s = buf, *field;
+ struct bbpos ret;
+
+ if (!(field = strsep(&s, ":")))
+ die("invalid bbpos %s", buf);
+
+ ret.btree = read_string_list_or_die(field, __bch2_btree_ids, "btree id");
+
+ if (!s)
+ die("invalid bbpos %s", buf);
+
+ ret.pos = bpos_parse(s);
+ return ret;
+}
+
+struct bbpos_range bbpos_range_parse(char *buf)
+{
+ char *s = buf;
+ char *start_str = strsep(&s, "-");
+ char *end_str = strsep(&s, "-");
+
+ struct bbpos start = bbpos_parse(start_str);
+ struct bbpos end = end_str ? bbpos_parse(end_str) : start;
+
+ return (struct bbpos_range) { .start = start, .end = end };
+}
+
+darray_str get_or_split_cmdline_devs(int argc, char *argv[])
+{
+ darray_str ret = {};
+
+ if (argc == 1) {
+ bch2_split_devs(argv[0], &ret);
+ } else {
+ for (unsigned i = 0; i < argc; i++)
+ darray_push(&ret, strdup(argv[i]));
+ }
+
+ return ret;
+}
diff --git a/c_src/tools-util.h b/c_src/tools-util.h
new file mode 100644
index 00000000..572aca05
--- /dev/null
+++ b/c_src/tools-util.h
@@ -0,0 +1,214 @@
+#ifndef _TOOLS_UTIL_H
+#define _TOOLS_UTIL_H
+
+#include <errno.h>
+#include <mntent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/bug.h>
+#include <linux/byteorder.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
+#include "libbcachefs/bcachefs.h"
+#include "libbcachefs/bbpos.h"
+#include "libbcachefs/darray.h"
+
+#define noreturn __attribute__((noreturn))
+
+void die(const char *, ...)
+ __attribute__ ((format (printf, 1, 2))) noreturn;
+char *mprintf(const char *, ...)
+ __attribute__ ((format (printf, 1, 2)));
+void xpread(int, void *, size_t, off_t);
+void xpwrite(int, const void *, size_t, off_t, const char *);
+struct stat xfstatat(int, const char *, int);
+struct stat xfstat(int);
+struct stat xstat(const char *);
+
+static inline void *xmalloc(size_t size)
+{
+ void *p = malloc(size);
+
+ if (!p)
+ die("insufficient memory");
+
+ memset(p, 0, size);
+ return p;
+}
+
+static inline void *xcalloc(size_t count, size_t size)
+{
+ void *p = calloc(count, size);
+
+ if (!p)
+ die("insufficient memory");
+
+ return p;
+}
+
+static inline void *xrealloc(void *p, size_t size)
+{
+ p = realloc(p, size);
+ if (!p)
+ die("insufficient memory");
+
+ return p;
+}
+
+#define xopenat(_dirfd, _path, ...) \
+({ \
+ int _fd = openat((_dirfd), (_path), __VA_ARGS__); \
+ if (_fd < 0) \
+ die("Error opening %s: %m", (_path)); \
+ _fd; \
+})
+
+#define xopen(...) xopenat(AT_FDCWD, __VA_ARGS__)
+
+#define xioctl(_fd, _nr, ...) \
+({ \
+ int _ret = ioctl((_fd), (_nr), ##__VA_ARGS__); \
+ if (_ret < 0) \
+ die(#_nr " ioctl error: %m"); \
+ _ret; \
+})
+
+void write_file_str(int, const char *, const char *);
+char *read_file_str(int, const char *);
+u64 read_file_u64(int, const char *);
+
+ssize_t read_string_list_or_die(const char *, const char * const[],
+ const char *);
+
+u64 get_size(int);
+unsigned get_blocksize(int);
+struct dev_opts;
+int open_for_format(struct dev_opts *, bool);
+
+bool ask_yn(void);
+
+struct range {
+ u64 start;
+ u64 end;
+};
+
+typedef DARRAY(struct range) ranges;
+
+static inline void range_add(ranges *data, u64 offset, u64 size)
+{
+ darray_push(data, ((struct range) {
+ .start = offset,
+ .end = offset + size
+ }));
+}
+
+void ranges_sort_merge(ranges *);
+void ranges_roundup(ranges *, unsigned);
+void ranges_rounddown(ranges *, unsigned);
+
+struct hole_iter {
+ ranges r;
+ size_t idx;
+ u64 end;
+};
+
+static inline struct range hole_iter_next(struct hole_iter *iter)
+{
+ struct range r = {
+ .start = iter->idx ? iter->r.data[iter->idx - 1].end : 0,
+ .end = iter->idx < iter->r.nr
+ ? iter->r.data[iter->idx].start : iter->end,
+ };
+
+ BUG_ON(r.start > r.end);
+
+ iter->idx++;
+ return r;
+}
+
+#define for_each_hole(_iter, _ranges, _end, _i) \
+ for (_iter = (struct hole_iter) { .r = _ranges, .end = _end }; \
+ (_iter.idx <= _iter.r.nr && \
+ (_i = hole_iter_next(&_iter), true));)
+
+#include <linux/fiemap.h>
+
+struct fiemap_iter {
+ struct fiemap *f;
+ unsigned idx;
+ int fd;
+};
+
+static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd)
+{
+ memset(iter, 0, sizeof(*iter));
+
+ iter->f = xmalloc(sizeof(struct fiemap) +
+ sizeof(struct fiemap_extent) * 1024);
+
+ iter->f->fm_extent_count = 1024;
+ iter->f->fm_length = FIEMAP_MAX_OFFSET;
+ iter->fd = fd;
+}
+
+static inline void fiemap_iter_exit(struct fiemap_iter *iter)
+{
+ free(iter->f);
+ memset(iter, 0, sizeof(*iter));
+}
+
+struct fiemap_extent fiemap_iter_next(struct fiemap_iter *);
+
+#define fiemap_for_each(fd, iter, extent) \
+ for (fiemap_iter_init(&iter, fd); \
+ (extent = fiemap_iter_next(&iter)).fe_length;)
+
+char *strcmp_prefix(char *, const char *);
+
+/* Avoid conflicts with libblkid's crc32 function in static builds */
+#define crc32c bch_crc32c
+u32 crc32c(u32, const void *, size_t);
+
+char *dev_to_name(dev_t);
+char *dev_to_path(dev_t);
+struct mntent *dev_to_mount(char *);
+int dev_mounted(char *);
+char *fd_to_dev_model(int);
+
+#define args_shift(_nr) \
+do { \
+ unsigned _n = min((_nr), argc); \
+ argc -= _n; \
+ argv += _n; \
+} while (0)
+
+#define arg_pop() \
+({ \
+ char *_ret = argc ? argv[0] : NULL; \
+ if (_ret) \
+ args_shift(1); \
+ _ret; \
+})
+
+struct bpos bpos_parse(char *);
+struct bbpos bbpos_parse(char *);
+
+struct bbpos_range {
+ struct bbpos start;
+ struct bbpos end;
+};
+
+struct bbpos_range bbpos_range_parse(char *);
+
+darray_str get_or_split_cmdline_devs(int argc, char *argv[]);
+
+#endif /* _TOOLS_UTIL_H */