diff options
Diffstat (limited to 'c_src')
-rw-r--r-- | c_src/bcachefs.c | 164 | ||||
-rw-r--r-- | c_src/cmd_assemble.c | 48 | ||||
-rw-r--r-- | c_src/cmd_attr.c | 119 | ||||
-rw-r--r-- | c_src/cmd_counters.c | 51 | ||||
-rw-r--r-- | c_src/cmd_data.c | 127 | ||||
-rw-r--r-- | c_src/cmd_device.c | 647 | ||||
-rw-r--r-- | c_src/cmd_dump.c | 182 | ||||
-rw-r--r-- | c_src/cmd_format.c | 435 | ||||
-rw-r--r-- | c_src/cmd_fs.c | 544 | ||||
-rw-r--r-- | c_src/cmd_fsck.c | 348 | ||||
-rw-r--r-- | c_src/cmd_fusemount.c | 1314 | ||||
-rw-r--r-- | c_src/cmd_key.c | 161 | ||||
-rw-r--r-- | c_src/cmd_kill_btree_node.c | 140 | ||||
-rw-r--r-- | c_src/cmd_list_journal.c | 306 | ||||
-rw-r--r-- | c_src/cmd_migrate.c | 426 | ||||
-rw-r--r-- | c_src/cmd_option.c | 168 | ||||
-rw-r--r-- | c_src/cmd_run.c | 33 | ||||
-rw-r--r-- | c_src/cmd_version.c | 9 | ||||
-rw-r--r-- | c_src/cmds.h | 63 | ||||
-rw-r--r-- | c_src/config.h | 0 | ||||
-rw-r--r-- | c_src/crypto.c | 201 | ||||
-rw-r--r-- | c_src/crypto.h | 22 | ||||
-rw-r--r-- | c_src/libbcachefs.c | 754 | ||||
-rw-r--r-- | c_src/libbcachefs.h | 300 | ||||
-rw-r--r-- | c_src/posix_to_bcachefs.c | 461 | ||||
-rw-r--r-- | c_src/posix_to_bcachefs.h | 54 | ||||
-rw-r--r-- | c_src/qcow2.c | 134 | ||||
-rw-r--r-- | c_src/qcow2.h | 9 | ||||
-rw-r--r-- | c_src/tools-util.c | 741 | ||||
-rw-r--r-- | c_src/tools-util.h | 214 |
30 files changed, 8175 insertions, 0 deletions
diff --git a/c_src/bcachefs.c b/c_src/bcachefs.c new file mode 100644 index 00000000..77bf6215 --- /dev/null +++ b/c_src/bcachefs.c @@ -0,0 +1,164 @@ +/* + * Authors: Kent Overstreet <kent.overstreet@gmail.com> + * Gabriel de Perthuis <g2p.code@gmail.com> + * Jacob Malevich <jam@datera.io> + * + * GPLv2 + */ + +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <raid/raid.h> + +#include "cmds.h" + +void bcachefs_usage(void) +{ + puts("bcachefs - tool for managing bcachefs filesystems\n" + "usage: bcachefs <command> [<args>]\n" + "\n" + "Superblock commands:\n" + " format Format a new filesystem\n" + " show-super Dump superblock information to stdout\n" + " set-fs-option Set a filesystem option\n" + " reset-counters Reset all counters on an unmounted device\n" + "\n" + "Mount:\n" + " mount Mount a filesystem\n" + "\n" + "Repair:\n" + " fsck Check an existing filesystem for errors\n" + "\n" +#if 0 + "Startup/shutdown, assembly of multi device filesystems:\n" + " assemble Assemble an existing multi device filesystem\n" + " incremental Incrementally assemble an existing multi device filesystem\n" + " run Start a partially assembled filesystem\n" + " stop Stop a running filesystem\n" + "\n" +#endif + "Commands for managing a running filesystem:\n" + " fs usage Show disk usage\n" + "\n" + "Commands for managing devices within a running filesystem:\n" + " device add Add a new device to an existing filesystem\n" + " device remove Remove a device from an existing filesystem\n" + " device online Re-add an existing member to a filesystem\n" + " device offline Take a device offline, without removing it\n" + " device evacuate Migrate data off of a specific device\n" + " device set-state Mark a device as failed\n" + " device resize Resize filesystem on a device\n" + " device resize-journal Resize journal on a device\n" + "\n" + "Commands for managing subvolumes and snapshots:\n" + " subvolume create Create a new subvolume\n" + " subvolume delete Delete an existing subvolume\n" + " subvolume snapshot Create a snapshot\n" + "\n" + "Commands for managing filesystem data:\n" + " data rereplicate Rereplicate degraded data\n" + " data job Kick off low level data jobs\n" + "\n" + "Encryption:\n" + " unlock Unlock an encrypted filesystem prior to running/mounting\n" + " set-passphrase Change passphrase on an existing (unmounted) filesystem\n" + " remove-passphrase Remove passphrase on an existing (unmounted) filesystem\n" + "\n" + "Migrate:\n" + " migrate Migrate an existing filesystem to bcachefs, in place\n" + " migrate-superblock Add default superblock, after bcachefs migrate\n" + "\n" + "Commands for operating on files in a bcachefs filesystem:\n" + " set-file-option Set various attributes on files or directories\n" + "\n" + "Debug:\n" + "These commands work on offline, unmounted filesystems\n" + " dump Dump filesystem metadata to a qcow2 image\n" + " list List filesystem metadata in textual form\n" + " list_journal List contents of journal\n" + "\n" + "FUSE:\n" + " fusemount Mount a filesystem via FUSE\n" + "\n" + "Miscellaneous:\n" + " completions Generate shell completions\n" + " version Display the version of the invoked bcachefs tool\n"); +} + +static char *pop_cmd(int *argc, char *argv[]) +{ + char *cmd = argv[1]; + if (!(*argc < 2)) + memmove(&argv[1], &argv[2], (*argc - 2) * sizeof(argv[0])); + (*argc)--; + argv[*argc] = NULL; + + return cmd; +} + +int fs_cmds(int argc, char *argv[]) +{ + char *cmd = pop_cmd(&argc, argv); + + if (argc < 1) { + bcachefs_usage(); + exit(EXIT_FAILURE); + } + if (!strcmp(cmd, "usage")) + return cmd_fs_usage(argc, argv); + + return 0; +} + +int device_cmds(int argc, char *argv[]) +{ + char *cmd = pop_cmd(&argc, argv); + + if (argc < 1) + return device_usage(); + if (!strcmp(cmd, "add")) + return cmd_device_add(argc, argv); + if (!strcmp(cmd, "remove")) + return cmd_device_remove(argc, argv); + if (!strcmp(cmd, "online")) + return cmd_device_online(argc, argv); + if (!strcmp(cmd, "offline")) + return cmd_device_offline(argc, argv); + if (!strcmp(cmd, "evacuate")) + return cmd_device_evacuate(argc, argv); + if (!strcmp(cmd, "set-state")) + return cmd_device_set_state(argc, argv); + if (!strcmp(cmd, "resize")) + return cmd_device_resize(argc, argv); + if (!strcmp(cmd, "resize-journal")) + return cmd_device_resize_journal(argc, argv); + + return 0; +} + +int data_cmds(int argc, char *argv[]) +{ + char *cmd = pop_cmd(&argc, argv); + + if (argc < 1) + return data_usage(); + if (!strcmp(cmd, "rereplicate")) + return cmd_data_rereplicate(argc, argv); + if (!strcmp(cmd, "job")) + return cmd_data_job(argc, argv); + + return 0; +} diff --git a/c_src/cmd_assemble.c b/c_src/cmd_assemble.c new file mode 100644 index 00000000..a997e1e1 --- /dev/null +++ b/c_src/cmd_assemble.c @@ -0,0 +1,48 @@ + +#include <alloca.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "libbcachefs/bcachefs_ioctl.h" +#include "cmds.h" +#include "libbcachefs.h" + +#if 0 +int cmd_assemble(int argc, char *argv[]) +{ + unsigned nr_devs = argc - 1; + + if (argc <= 1) + die("Please supply at least one device"); + + struct bch_ioctl_assemble *assemble = + alloca(sizeof(*assemble) + sizeof(__u64) * nr_devs); + + memset(assemble, 0, sizeof(*assemble)); + assemble->nr_devs = nr_devs; + + unsigned i; + for (i = 0; i < nr_devs; i++) + assemble->devs[i] = (unsigned long) argv[i + 1]; + + xioctl(bcachectl_open(), BCH_IOCTL_ASSEMBLE, assemble); + return 0; +} + +int cmd_incremental(int argc, char *argv[]) +{ + if (argc != 2) + die("Please supply exactly one device"); + + struct bch_ioctl_incremental incremental = { + .dev = (unsigned long) argv[1], + }; + + xioctl(bcachectl_open(), BCH_IOCTL_INCREMENTAL, &incremental); + return 0; +} +#endif diff --git a/c_src/cmd_attr.c b/c_src/cmd_attr.c new file mode 100644 index 00000000..1da41265 --- /dev/null +++ b/c_src/cmd_attr.c @@ -0,0 +1,119 @@ +#include <dirent.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <unistd.h> + +#include "libbcachefs/bcachefs_ioctl.h" + +#include "cmds.h" +#include "libbcachefs.h" + +static void propagate_recurse(int dirfd) +{ + DIR *dir = fdopendir(dirfd); + struct dirent *d; + + if (!dir) { + fprintf(stderr, "fdopendir() error: %m\n"); + return; + } + + while ((errno = 0), (d = readdir(dir))) { + if (!strcmp(d->d_name, ".") || + !strcmp(d->d_name, "..")) + continue; + + int ret = ioctl(dirfd, BCHFS_IOC_REINHERIT_ATTRS, + d->d_name); + if (ret < 0) { + fprintf(stderr, "error propagating attributes to %s: %m\n", + d->d_name); + continue; + } + + if (!ret) /* did no work */ + continue; + + struct stat st = xfstatat(dirfd, d->d_name, + AT_SYMLINK_NOFOLLOW); + if (!S_ISDIR(st.st_mode)) + continue; + + int fd = openat(dirfd, d->d_name, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "error opening %s: %m\n", d->d_name); + continue; + } + propagate_recurse(fd); + close(fd); + } + + if (errno) + die("readdir error: %m"); +} + +static void do_setattr(char *path, struct bch_opt_strs opts) +{ + unsigned i; + + for (i = 0; i < bch2_opts_nr; i++) { + if (!opts.by_id[i]) + continue; + + char *n = mprintf("bcachefs.%s", bch2_opt_table[i].attr.name); + + if (setxattr(path, n, opts.by_id[i], strlen(opts.by_id[i]), 0)) + die("setxattr error: %m"); + + free(n); + } + + struct stat st = xstat(path); + if (!S_ISDIR(st.st_mode)) + return; + + int dirfd = open(path, O_RDONLY); + if (dirfd < 0) + die("error opening %s: %m", path); + + propagate_recurse(dirfd); + close(dirfd); +} + +static void setattr_usage(void) +{ + puts("bcachefs set-file-option - set attributes on files in a bcachefs filesystem\n" + "Usage: bcachefs set-file-option [OPTIONS]... <files>\n" + "\n" + "Options:"); + + bch2_opts_usage(OPT_INODE); + puts(" -h Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_setattr(int argc, char *argv[]) +{ + struct bch_opt_strs opts = + bch2_cmdline_opts_get(&argc, argv, OPT_INODE); + unsigned i; + + for (i = 1; i < argc; i++) + if (argv[i][0] == '-') { + printf("invalid option %s\n", argv[i]); + setattr_usage(); + exit(EXIT_FAILURE); + } + + if (argc <= 1) + die("Please supply one or more files"); + + for (i = 1; i < argc; i++) + do_setattr(argv[i], opts); + bch2_opt_strs_free(&opts); + + return 0; +} diff --git a/c_src/cmd_counters.c b/c_src/cmd_counters.c new file mode 100644 index 00000000..9adde242 --- /dev/null +++ b/c_src/cmd_counters.c @@ -0,0 +1,51 @@ +#include <getopt.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "libbcachefs/super-io.h" + +static void reset_counters_usage(void) +{ + puts("bcachefs reset-counters \n" + "Usage: bcachefs reset-counters device\n" + "\n" + "Options:\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_reset_counters(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "help", 0, NULL, 'h' }, + { NULL } + }; + int opt; + + while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1) + switch (opt) { + case 'h': + reset_counters_usage(); + break; + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("please supply a device"); + if (argc) + die("too many arguments"); + + struct bch_opts opts = bch2_opts_empty(); + struct bch_sb_handle sb; + int ret = bch2_read_super(dev, &opts, &sb); + if (ret) + die("Error opening %s: %s", dev, bch2_err_str(ret)); + + bch2_sb_field_resize(&sb, counters, 0); + + bch2_super_write(sb.bdev->bd_fd, sb.sb); + bch2_free_super(&sb); + return 0; +} diff --git a/c_src/cmd_data.c b/c_src/cmd_data.c new file mode 100644 index 00000000..1ef689bc --- /dev/null +++ b/c_src/cmd_data.c @@ -0,0 +1,127 @@ + + +#include <stdio.h> +#include <sys/ioctl.h> + +#include "libbcachefs/bcachefs_ioctl.h" +#include "libbcachefs/btree_cache.h" +#include "libbcachefs/move.h" + +#include "cmds.h" +#include "libbcachefs.h" + +int data_usage(void) +{ + puts("bcachefs data - manage filesystem data\n" + "Usage: bcachefs data <CMD> [OPTIONS]\n" + "\n" + "Commands:\n" + " rereplicate Rereplicate degraded data\n" + " job Kick off low level data jobs\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + return 0; +} + +static void data_rereplicate_usage(void) +{ + puts("bcachefs data rereplicate\n" + "Usage: bcachefs data rereplicate filesystem\n" + "\n" + "Walks existing data in a filesystem, writing additional copies\n" + "of any degraded data\n" + "\n" + "Options:\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_data_rereplicate(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "h")) != -1) + switch (opt) { + case 'h': + data_rereplicate_usage(); + } + args_shift(optind); + + char *fs_path = arg_pop(); + if (!fs_path) + die("Please supply a filesystem"); + + if (argc) + die("too many arguments"); + + return bchu_data(bcache_fs_open(fs_path), (struct bch_ioctl_data) { + .op = BCH_DATA_OP_rereplicate, + .start_btree = 0, + .start_pos = POS_MIN, + .end_btree = BTREE_ID_NR, + .end_pos = POS_MAX, + }); +} + +static void data_job_usage(void) +{ + puts("bcachefs data job\n" + "Usage: bcachefs data job [job} filesystem\n" + "\n" + "Kick off a data job and report progress\n" + "\n" + "job: one of scrub, rereplicate, migrate, rewrite_old_nodes, or drop_extra_replicas\n" + "\n" + "Options:\n" + " -b btree btree to operate on\n" + " -s inode:offset start position\n" + " -e inode:offset end position\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_data_job(int argc, char *argv[]) +{ + struct bch_ioctl_data op = { + .start_btree = 0, + .start_pos = POS_MIN, + .end_btree = BTREE_ID_NR, + .end_pos = POS_MAX, + }; + int opt; + + while ((opt = getopt(argc, argv, "s:e:h")) != -1) + switch (opt) { + case 'b': + op.start_btree = read_string_list_or_die(optarg, + __bch2_btree_ids, "btree id"); + op.end_btree = op.start_btree; + break; + case 's': + op.start_pos = bpos_parse(optarg); + break; + op.end_pos = bpos_parse(optarg); + case 'e': + break; + case 'h': + data_job_usage(); + } + args_shift(optind); + + char *job = arg_pop(); + if (!job) + die("please specify which type of job"); + + op.op = read_string_list_or_die(job, bch2_data_ops_strs, "bad job type"); + + char *fs_path = arg_pop(); + if (!fs_path) + fs_path = "."; + + if (argc) + die("too many arguments"); + + return bchu_data(bcache_fs_open(fs_path), op); +} diff --git a/c_src/cmd_device.c b/c_src/cmd_device.c new file mode 100644 index 00000000..c86fb7f1 --- /dev/null +++ b/c_src/cmd_device.c @@ -0,0 +1,647 @@ +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <libgen.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/bcachefs_ioctl.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/journal.h" +#include "libbcachefs/sb-members.h" +#include "libbcachefs/super-io.h" +#include "cmds.h" +#include "libbcachefs.h" +#include "libbcachefs/opts.h" +#include "tools-util.h" + +int device_usage(void) +{ + puts("bcachefs device - manage devices within a running filesystem\n" + "Usage: bcachefs device <CMD> [OPTION]\n" + "\n" + "Commands:\n" + " add add a new device to an existing filesystem\n" + " remove remove a device from an existing filesystem\n" + " online re-add an existing member to a filesystem\n" + " offline take a device offline, without removing it\n" + " evacuate migrate data off a specific device\n" + " set-state mark a device as failed\n" + " resize resize filesystem on a device\n" + " resize-journal resize journal on a device\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + return 0; +} + +static void device_add_usage(void) +{ + puts("bcachefs device add - add a device to an existing filesystem\n" + "Usage: bcachefs device add [OPTION]... filesystem device\n" + "\n" + "Options:\n" + " -S, --fs_size=size Size of filesystem on device\n" + " -B, --bucket=size Bucket size\n" + " -D, --discard Enable discards\n" + " -l, --label=label Disk label\n" + " -f, --force Use device even if it appears to already be formatted\n" + " -h, --help Display this help and exit\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_device_add(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "fs_size", required_argument, NULL, 'S' }, + { "bucket", required_argument, NULL, 'B' }, + { "discard", no_argument, NULL, 'D' }, + { "label", required_argument, NULL, 'l' }, + { "force", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + struct format_opts format_opts = format_opts_default(); + struct dev_opts dev_opts = dev_opts_default(); + bool force = false; + int opt; + + while ((opt = getopt_long(argc, argv, "S:B:Dl:fh", + longopts, NULL)) != -1) + switch (opt) { + case 'S': + if (bch2_strtoull_h(optarg, &dev_opts.size)) + die("invalid filesystem size"); + break; + case 'B': + if (bch2_strtoull_h(optarg, &dev_opts.bucket_size)) + die("bad bucket_size %s", optarg); + break; + case 'D': + dev_opts.discard = true; + break; + case 'l': + dev_opts.label = strdup(optarg); + break; + case 'f': + force = true; + break; + case 'h': + device_add_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + char *fs_path = arg_pop(); + if (!fs_path) + die("Please supply a filesystem"); + + dev_opts.path = arg_pop(); + if (!dev_opts.path) + die("Please supply a device"); + + if (argc) + die("too many arguments"); + + struct bchfs_handle fs = bcache_fs_open(fs_path); + + int ret = open_for_format(&dev_opts, force); + if (ret) + die("Error opening %s: %s", dev_opts.path, strerror(-ret)); + + struct bch_opt_strs fs_opt_strs; + memset(&fs_opt_strs, 0, sizeof(fs_opt_strs)); + + struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs); + + opt_set(fs_opts, block_size, + read_file_u64(fs.sysfs_fd, "options/block_size")); + opt_set(fs_opts, btree_node_size, + read_file_u64(fs.sysfs_fd, "options/btree_node_size")); + + struct bch_sb *sb = bch2_format(fs_opt_strs, + fs_opts, + format_opts, + &dev_opts, 1); + free(sb); + bchu_disk_add(fs, dev_opts.path); + return 0; +} + +static void device_remove_usage(void) +{ + puts("bcachefs device_remove - remove a device from a filesystem\n" + "Usage:\n" + " bcachefs device remove <device>|<devid> <path>\n" + "\n" + "Options:\n" + " -f, --force Force removal, even if some data\n" + " couldn't be migrated\n" + " -F, --force-metadata Force removal, even if some metadata\n" + " couldn't be migrated\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_device_remove(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "by-id", 0, NULL, 'i' }, + { "force", 0, NULL, 'f' }, + { "force-metadata", 0, NULL, 'F' }, + { "help", 0, NULL, 'h' }, + { NULL } + }; + struct bchfs_handle fs; + bool by_id = false; + int opt, flags = BCH_FORCE_IF_DEGRADED, dev_idx; + + while ((opt = getopt_long(argc, argv, "fh", longopts, NULL)) != -1) + switch (opt) { + case 'f': + flags |= BCH_FORCE_IF_DATA_LOST; + break; + case 'F': + flags |= BCH_FORCE_IF_METADATA_LOST; + break; + case 'h': + device_remove_usage(); + } + args_shift(optind); + + char *dev_str = arg_pop(); + if (!dev_str) + die("Please supply a device"); + + char *end; + dev_idx = strtoul(dev_str, &end, 10); + if (*dev_str && !*end) + by_id = true; + + char *fs_path = arg_pop(); + if (fs_path) { + fs = bcache_fs_open(fs_path); + + if (!by_id) { + dev_idx = bchu_dev_path_to_idx(fs, dev_str); + if (dev_idx < 0) + die("%s does not seem to be a member of %s", + dev_str, fs_path); + } + } else if (!by_id) { + fs = bchu_fs_open_by_dev(dev_str, &dev_idx); + } else { + die("Filesystem path required when specifying device by id"); + } + + bchu_disk_remove(fs, dev_idx, flags); + return 0; +} + +static void device_online_usage(void) +{ + puts("bcachefs device online - readd a device to a running filesystem\n" + "Usage: bcachefs device online [OPTION]... device\n" + "\n" + "Options:\n" + " -h, --help Display this help and exit\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_device_online(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "h")) != -1) + switch (opt) { + case 'h': + device_online_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("Please supply a device"); + + if (argc) + die("too many arguments"); + + int dev_idx; + struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx); + bchu_disk_online(fs, dev); + return 0; +} + +static void device_offline_usage(void) +{ + puts("bcachefs device offline - take a device offline, without removing it\n" + "Usage: bcachefs device offline [OPTION]... device\n" + "\n" + "Options:\n" + " -f, --force Force, if data redundancy will be degraded\n" + " -h, --help Display this help and exit\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_device_offline(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "force", 0, NULL, 'f' }, + { NULL } + }; + int opt, flags = 0; + + while ((opt = getopt_long(argc, argv, "fh", + longopts, NULL)) != -1) + switch (opt) { + case 'f': + flags |= BCH_FORCE_IF_DEGRADED; + break; + case 'h': + device_offline_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("Please supply a device"); + + if (argc) + die("too many arguments"); + + int dev_idx; + struct bchfs_handle fs = bchu_fs_open_by_dev(dev, &dev_idx); + bchu_disk_offline(fs, dev_idx, flags); + return 0; +} + +static void device_evacuate_usage(void) +{ + puts("bcachefs device evacuate - move data off of a given device\n" + "Usage: bcachefs device evacuate [OPTION]... device\n" + "\n" + "Options:\n" + " -h, --help Display this help and exit\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_device_evacuate(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "h")) != -1) + switch (opt) { + case 'h': + device_evacuate_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + char *dev_path = arg_pop(); + if (!dev_path) + die("Please supply a device"); + + if (argc) + die("too many arguments"); + + int dev_idx; + struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx); + + struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, dev_idx); + + if (u->state == BCH_MEMBER_STATE_rw) { + printf("Setting %s readonly\n", dev_path); + bchu_disk_set_state(fs, dev_idx, BCH_MEMBER_STATE_ro, 0); + } + + free(u); + + return bchu_data(fs, (struct bch_ioctl_data) { + .op = BCH_DATA_OP_migrate, + .start_btree = 0, + .start_pos = POS_MIN, + .end_btree = BTREE_ID_NR, + .end_pos = POS_MAX, + .migrate.dev = dev_idx, + }); +} + +static void device_set_state_usage(void) +{ + puts("bcachefs device set-state\n" + "Usage: bcachefs device set-state <new-state> <device>|<devid> <path>\n" + "\n" + "<new-state>: one of rw, ro, failed or spare\n" + "<path>: path to mounted filesystem, optional unless specifying device by id\n" + "\n" + "Options:\n" + " -f, --force Force, if data redundancy will be degraded\n" + " --force-if-data-lost Force, if data will be lost\n" + " -o, --offline Set state of an offline device\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_device_set_state(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "force", 0, NULL, 'f' }, + { "force-if-data-lost", 0, NULL, 'F' }, + { "offline", 0, NULL, 'o' }, + { "help", 0, NULL, 'h' }, + { NULL } + }; + struct bchfs_handle fs; + bool by_id = false; + int opt, flags = 0, dev_idx; + bool offline = false; + + while ((opt = getopt_long(argc, argv, "foh", longopts, NULL)) != -1) + switch (opt) { + case 'f': + flags |= BCH_FORCE_IF_DEGRADED; + break; + case 'F': + flags |= BCH_FORCE_IF_DEGRADED; + flags |= BCH_FORCE_IF_LOST; + break; + case 'o': + offline = true; + break; + case 'h': + device_set_state_usage(); + } + args_shift(optind); + + char *new_state_str = arg_pop(); + if (!new_state_str) + die("Please supply a device state"); + + unsigned new_state = read_string_list_or_die(new_state_str, + bch2_member_states, "device state"); + + char *dev_str = arg_pop(); + if (!dev_str) + die("Please supply a device"); + + char *end; + dev_idx = strtoul(dev_str, &end, 10); + if (*dev_str && !*end) + by_id = true; + + if (offline) { + struct bch_opts opts = bch2_opts_empty(); + struct bch_sb_handle sb = { NULL }; + + if (by_id) + die("Cannot specify offline device by id"); + + int ret = bch2_read_super(dev_str, &opts, &sb); + if (ret) + die("error opening %s: %s", dev_str, bch2_err_str(ret)); + + struct bch_member *m = bch2_members_v2_get_mut(sb.sb, sb.sb->dev_idx); + + SET_BCH_MEMBER_STATE(m, new_state); + + le64_add_cpu(&sb.sb->seq, 1); + + bch2_super_write(sb.bdev->bd_fd, sb.sb); + ret = fsync(sb.bdev->bd_fd); + if (ret) + fprintf(stderr, "error writing superblock: fsync error (%m)"); + bch2_free_super(&sb); + return ret; + } + + char *fs_path = arg_pop(); + if (fs_path) { + fs = bcache_fs_open(fs_path); + + if (!by_id) { + dev_idx = bchu_dev_path_to_idx(fs, dev_str); + if (dev_idx < 0) + die("%s does not seem to be a member of %s", + dev_str, fs_path); + } + } else if (!by_id) { + fs = bchu_fs_open_by_dev(dev_str, &dev_idx); + } else { + die("Filesystem path required when specifying device by id"); + } + + bchu_disk_set_state(fs, dev_idx, new_state, flags); + + return 0; +} + +static void device_resize_usage(void) +{ + puts("bcachefs device resize \n" + "Usage: bcachefs device resize device [ size ]\n" + "\n" + "Options:\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_device_resize(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "help", 0, NULL, 'h' }, + { NULL } + }; + u64 size; + int opt; + + while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1) + switch (opt) { + case 'h': + device_resize_usage(); + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("Please supply a device to resize"); + + int dev_fd = xopen(dev, O_RDONLY); + + char *size_arg = arg_pop(); + if (!size_arg) + size = get_size(dev_fd); + else if (bch2_strtoull_h(size_arg, &size)) + die("invalid size"); + + size >>= 9; + + if (argc) + die("Too many arguments"); + + struct stat dev_stat = xfstat(dev_fd); + + struct mntent *mount = dev_to_mount(dev); + if (mount) { + if (!S_ISBLK(dev_stat.st_mode)) + die("%s is mounted but isn't a block device?!", dev); + + printf("Doing online resize of %s\n", dev); + + struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir); + + unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev); + + struct bch_sb *sb = bchu_read_super(fs, -1); + if (idx >= sb->nr_devices) + die("error reading superblock: dev idx >= sb->nr_devices"); + + struct bch_member m = bch2_sb_member_get(sb, idx); + + u64 nbuckets = size / le16_to_cpu(m.bucket_size); + + if (nbuckets < le64_to_cpu(m.nbuckets)) + die("Shrinking not supported yet"); + + printf("resizing %s to %llu buckets\n", dev, nbuckets); + bchu_disk_resize(fs, idx, nbuckets); + } else { + printf("Doing offline resize of %s\n", dev); + + struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); + if (IS_ERR(c)) + die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); + + struct bch_dev *resize = NULL; + + for_each_online_member(c, ca) { + if (resize) + die("confused: more than one online device?"); + resize = ca; + percpu_ref_get(&resize->io_ref); + } + + u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size); + + if (nbuckets < le64_to_cpu(resize->mi.nbuckets)) + die("Shrinking not supported yet"); + + printf("resizing %s to %llu buckets\n", dev, nbuckets); + int ret = bch2_dev_resize(c, resize, nbuckets); + if (ret) + fprintf(stderr, "resize error: %s\n", bch2_err_str(ret)); + + percpu_ref_put(&resize->io_ref); + bch2_fs_stop(c); + } + return 0; +} + +static void device_resize_journal_usage(void) +{ + puts("bcachefs device resize-journal \n" + "Usage: bcachefs device resize-journal device size\n" + "\n" + "Options:\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_device_resize_journal(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "help", 0, NULL, 'h' }, + { NULL } + }; + u64 size; + int opt; + + while ((opt = getopt_long(argc, argv, "h", longopts, NULL)) != -1) + switch (opt) { + case 'h': + device_resize_journal_usage(); + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("Please supply a device"); + + int dev_fd = xopen(dev, O_RDONLY); + + char *size_arg = arg_pop(); + if (!size_arg) + die("Please supply a journal size"); + else if (bch2_strtoull_h(size_arg, &size)) + die("invalid size"); + + size >>= 9; + + if (argc) + die("Too many arguments"); + + struct stat dev_stat = xfstat(dev_fd); + + struct mntent *mount = dev_to_mount(dev); + if (mount) { + if (!S_ISBLK(dev_stat.st_mode)) + die("%s is mounted but isn't a block device?!", dev); + + struct bchfs_handle fs = bcache_fs_open(mount->mnt_dir); + + unsigned idx = bchu_disk_get_idx(fs, dev_stat.st_rdev); + + struct bch_sb *sb = bchu_read_super(fs, -1); + if (idx >= sb->nr_devices) + die("error reading superblock: dev idx >= sb->nr_devices"); + + struct bch_member m = bch2_sb_member_get(sb, idx); + + u64 nbuckets = size / le16_to_cpu(m.bucket_size); + + printf("resizing journal on %s to %llu buckets\n", dev, nbuckets); + bchu_disk_resize_journal(fs, idx, nbuckets); + } else { + printf("%s is offline - starting:\n", dev); + + struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); + if (IS_ERR(c)) + die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); + + struct bch_dev *resize = NULL; + + for_each_online_member(c, ca) { + if (resize) + die("confused: more than one online device?"); + resize = ca; + percpu_ref_get(&resize->io_ref); + } + + u64 nbuckets = size / le16_to_cpu(resize->mi.bucket_size); + + printf("resizing journal on %s to %llu buckets\n", dev, nbuckets); + int ret = bch2_set_nr_journal_buckets(c, resize, nbuckets); + if (ret) + fprintf(stderr, "resize error: %s\n", bch2_err_str(ret)); + + percpu_ref_put(&resize->io_ref); + bch2_fs_stop(c); + } + return 0; +} diff --git a/c_src/cmd_dump.c b/c_src/cmd_dump.c new file mode 100644 index 00000000..c9e417f2 --- /dev/null +++ b/c_src/cmd_dump.c @@ -0,0 +1,182 @@ +#include <fcntl.h> +#include <getopt.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "qcow2.h" + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/btree_cache.h" +#include "libbcachefs/btree_io.h" +#include "libbcachefs/btree_iter.h" +#include "libbcachefs/error.h" +#include "libbcachefs/extents.h" +#include "libbcachefs/sb-members.h" +#include "libbcachefs/super.h" + +static void dump_usage(void) +{ + puts("bcachefs dump - dump filesystem metadata\n" + "Usage: bcachefs dump [OPTION]... <devices>\n" + "\n" + "Options:\n" + " -o output Output qcow2 image(s)\n" + " -f, --force Force; overwrite when needed\n" + " --nojournal Don't dump entire journal, just dirty entries\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +static void dump_node(struct bch_fs *c, struct bch_dev *ca, struct bkey_s_c k, ranges *data) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->dev == ca->dev_idx) + range_add(data, ptr->offset << 9, c->opts.btree_node_size); +} + +static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd, + bool entire_journal) +{ + struct bch_sb *sb = ca->disk_sb.sb; + ranges data = { 0 }; + unsigned i; + int ret; + + /* Superblock: */ + range_add(&data, BCH_SB_LAYOUT_SECTOR << 9, + sizeof(struct bch_sb_layout)); + + for (i = 0; i < sb->layout.nr_superblocks; i++) + range_add(&data, + le64_to_cpu(sb->layout.sb_offset[i]) << 9, + vstruct_bytes(sb)); + + /* Journal: */ + for (i = 0; i < ca->journal.nr; i++) + if (entire_journal || + ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) { + u64 bucket = ca->journal.buckets[i]; + + range_add(&data, + bucket_bytes(ca) * bucket, + bucket_bytes(ca)); + } + + /* Btree: */ + for (i = 0; i < BTREE_ID_NR; i++) { + struct btree_trans *trans = bch2_trans_get(c); + + ret = __for_each_btree_node(trans, iter, i, POS_MIN, 0, 1, 0, b, ({ + struct btree_node_iter iter; + struct bkey u; + struct bkey_s_c k; + + for_each_btree_node_key_unpack(b, k, &iter, &u) + dump_node(c, ca, k, &data); + 0; + })); + + if (ret) + die("error %s walking btree nodes", bch2_err_str(ret)); + + struct btree *b = bch2_btree_id_root(c, i)->b; + if (!btree_node_fake(b)) + dump_node(c, ca, bkey_i_to_s_c(&b->key), &data); + + bch2_trans_put(trans); + } + + qcow2_write_image(ca->disk_sb.bdev->bd_fd, fd, &data, + max_t(unsigned, c->opts.btree_node_size / 8, block_bytes(c))); + darray_exit(&data); +} + +int cmd_dump(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "force", no_argument, NULL, 'f' }, + { "nojournal", no_argument, NULL, 'j' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + struct bch_opts opts = bch2_opts_empty(); + char *out = NULL; + unsigned nr_devices = 0; + bool force = false, entire_journal = true; + int fd, opt; + + opt_set(opts, direct_io, false); + opt_set(opts, noexcl, true); + opt_set(opts, read_only, true); + opt_set(opts, nochanges, true); + opt_set(opts, norecovery, true); + opt_set(opts, degraded, true); + opt_set(opts, very_degraded, true); + opt_set(opts, errors, BCH_ON_ERROR_continue); + opt_set(opts, fix_errors, FSCK_FIX_no); + + while ((opt = getopt_long(argc, argv, "o:fvh", + longopts, NULL)) != -1) + switch (opt) { + case 'o': + out = optarg; + break; + case 'f': + force = true; + break; + case 'j': + entire_journal = false; + break; + case 'v': + opt_set(opts, verbose, true); + break; + case 'h': + dump_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + if (!out) + die("Please supply output filename"); + + if (!argc) + die("Please supply device(s) to check"); + + struct bch_fs *c = bch2_fs_open(argv, argc, opts); + if (IS_ERR(c)) + die("error opening devices: %s", bch2_err_str(PTR_ERR(c))); + + down_read(&c->state_lock); + + for_each_online_member(c, ca) + nr_devices++; + + BUG_ON(!nr_devices); + + for_each_online_member(c, ca) { + int flags = O_WRONLY|O_CREAT|O_TRUNC; + + if (!force) + flags |= O_EXCL; + + char *path = nr_devices > 1 + ? mprintf("%s.%u.qcow2", out, ca->dev_idx) + : mprintf("%s.qcow2", out); + fd = xopen(path, flags, 0600); + free(path); + + dump_one_device(c, ca, fd, entire_journal); + close(fd); + } + + up_read(&c->state_lock); + + bch2_fs_stop(c); + return 0; +} diff --git a/c_src/cmd_format.c b/c_src/cmd_format.c new file mode 100644 index 00000000..d0c8e197 --- /dev/null +++ b/c_src/cmd_format.c @@ -0,0 +1,435 @@ +/* + * Authors: Kent Overstreet <kent.overstreet@gmail.com> + * Gabriel de Perthuis <g2p.code@gmail.com> + * Jacob Malevich <jam@datera.io> + * + * GPLv2 + */ +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <uuid/uuid.h> + +#include "cmds.h" +#include "posix_to_bcachefs.h" +#include "libbcachefs.h" +#include "crypto.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/super-io.h" +#include "libbcachefs/util.h" + +#include "libbcachefs/darray.h" + +#define OPTS \ +x(0, replicas, required_argument) \ +x(0, encrypted, no_argument) \ +x(0, no_passphrase, no_argument) \ +x('L', fs_label, required_argument) \ +x('U', uuid, required_argument) \ +x(0, fs_size, required_argument) \ +x(0, superblock_size, required_argument) \ +x(0, bucket_size, required_argument) \ +x('l', label, required_argument) \ +x(0, discard, no_argument) \ +x(0, data_allowed, required_argument) \ +x(0, durability, required_argument) \ +x(0, version, required_argument) \ +x(0, no_initialize, no_argument) \ +x(0, source, required_argument) \ +x('f', force, no_argument) \ +x('q', quiet, no_argument) \ +x('v', verbose, no_argument) \ +x('h', help, no_argument) + +static void usage(void) +{ + puts("bcachefs format - create a new bcachefs filesystem on one or more devices\n" + "Usage: bcachefs format [OPTION]... <devices>\n" + "\n" + "Options:"); + + bch2_opts_usage(OPT_FORMAT); + + puts( + " --replicas=# Sets both data and metadata replicas\n" + " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n" + " --no_passphrase Don't encrypt master encryption key\n" + " -L, --fs_label=label\n" + " -U, --uuid=uuid\n" + " --superblock_size=size\n" + " --source=path Initialize the bcachefs filesystem from this root directory\n" + "\n" + "Device specific options:"); + + bch2_opts_usage(OPT_DEVICE); + + puts(" -l, --label=label Disk label\n" + "\n" + " -f, --force\n" + " -q, --quiet Only print errors\n" + " -v, --verbose Verbose filesystem initialization\n" + " -h, --help Display this help and exit\n" + "\n" + "Device specific options must come before corresponding devices, e.g.\n" + " bcachefs format --label cache /dev/sdb /dev/sdc\n" + "\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +enum { + O_no_opt = 1, +#define x(shortopt, longopt, arg) O_##longopt, + OPTS +#undef x +}; + +#define x(shortopt, longopt, arg) { \ + .name = #longopt, \ + .has_arg = arg, \ + .flag = NULL, \ + .val = O_##longopt, \ +}, +static const struct option format_opts[] = { + OPTS + { NULL } +}; +#undef x + +u64 read_flag_list_or_die(char *opt, const char * const list[], + const char *msg) +{ + u64 v = bch2_read_flag_list(opt, list); + if (v == (u64) -1) + die("Bad %s %s", msg, opt); + + return v; +} + +void build_fs(struct bch_fs *c, const char *src_path) +{ + struct copy_fs_state s = {}; + int src_fd = xopen(src_path, O_RDONLY|O_NOATIME); + struct stat stat = xfstat(src_fd); + + if (!S_ISDIR(stat.st_mode)) + die("%s is not a directory", src_path); + + copy_fs(c, src_fd, src_path, &s); +} + +int cmd_format(int argc, char *argv[]) +{ + DARRAY(struct dev_opts) devices = { 0 }; + DARRAY(char *) device_paths = { 0 }; + struct format_opts opts = format_opts_default(); + struct dev_opts dev_opts = dev_opts_default(); + bool force = false, no_passphrase = false, quiet = false, initialize = true, verbose = false; + bool unconsumed_dev_option = false; + unsigned v; + int opt; + + struct bch_opt_strs fs_opt_strs = + bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT); + struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs); + + if (getenv("BCACHEFS_KERNEL_ONLY")) + initialize = false; + + while ((opt = getopt_long(argc, argv, + "-L:U:g:fqhv", + format_opts, + NULL)) != -1) + switch (opt) { + case O_replicas: + if (kstrtouint(optarg, 10, &v) || + !v || + v > BCH_REPLICAS_MAX) + die("invalid replicas"); + + opt_set(fs_opts, metadata_replicas, v); + opt_set(fs_opts, data_replicas, v); + break; + case O_source: + opts.source = optarg; + break; + case O_encrypted: + opts.encrypted = true; + break; + case O_no_passphrase: + no_passphrase = true; + break; + case O_fs_label: + case 'L': + opts.label = optarg; + break; + case O_uuid: + case 'U': + if (uuid_parse(optarg, opts.uuid.b)) + die("Bad uuid"); + break; + case O_force: + case 'f': + force = true; + break; + case O_fs_size: + if (bch2_strtoull_h(optarg, &dev_opts.size)) + die("invalid filesystem size"); + unconsumed_dev_option = true; + break; + case O_superblock_size: + if (bch2_strtouint_h(optarg, &opts.superblock_size)) + die("invalid filesystem size"); + + opts.superblock_size >>= 9; + break; + case O_bucket_size: + if (bch2_strtoull_h(optarg, &dev_opts.bucket_size)) + die("bad bucket_size %s", optarg); + unconsumed_dev_option = true; + break; + case O_label: + case 'l': + dev_opts.label = optarg; + unconsumed_dev_option = true; + break; + case O_discard: + dev_opts.discard = true; + unconsumed_dev_option = true; + break; + case O_data_allowed: + dev_opts.data_allowed = + read_flag_list_or_die(optarg, + __bch2_data_types, "data type"); + unconsumed_dev_option = true; + break; + case O_durability: + if (kstrtouint(optarg, 10, &dev_opts.durability) || + dev_opts.durability > BCH_REPLICAS_MAX) + die("invalid durability"); + unconsumed_dev_option = true; + break; + case O_version: + if (kstrtouint(optarg, 10, &opts.version)) + die("invalid version"); + break; + case O_no_initialize: + initialize = false; + break; + case O_no_opt: + darray_push(&device_paths, optarg); + dev_opts.path = optarg; + darray_push(&devices, dev_opts); + dev_opts.size = 0; + unconsumed_dev_option = false; + break; + case O_quiet: + case 'q': + quiet = true; + break; + case 'v': + verbose = true; + case O_help: + case 'h': + usage(); + exit(EXIT_SUCCESS); + break; + case '?': + exit(EXIT_FAILURE); + break; + } + + if (unconsumed_dev_option) + die("Options for devices apply to subsequent devices; got a device option with no device"); + + if (opts.version != bcachefs_metadata_version_current) + initialize = false; + + if (!devices.nr) + die("Please supply a device"); + + if (opts.encrypted && !no_passphrase) { + opts.passphrase = read_passphrase_twice("Enter passphrase: "); + initialize = false; + } + + darray_for_each(devices, dev) { + int ret = open_for_format(dev, force); + if (ret) + die("Error opening %s: %s", dev_opts.path, strerror(-ret)); + } + + struct bch_sb *sb = + bch2_format(fs_opt_strs, + fs_opts, + opts, + devices.data, devices.nr); + bch2_opt_strs_free(&fs_opt_strs); + + if (!quiet) { + struct printbuf buf = PRINTBUF; + + buf.human_readable_units = true; + + bch2_sb_to_text(&buf, sb, false, 1 << BCH_SB_FIELD_members_v2); + printf("%s", buf.buf); + + printbuf_exit(&buf); + } + free(sb); + + if (opts.passphrase) { + memzero_explicit(opts.passphrase, strlen(opts.passphrase)); + free(opts.passphrase); + } + + darray_exit(&devices); + + /* don't skip initialization when we have to build an image from a source */ + if (opts.source && !initialize) { + printf("Warning: Forcing the initialization because the source flag was supplied\n"); + initialize = 1; + } + + if (initialize) { + struct bch_opts mount_opts = bch2_opts_empty(); + + + opt_set(mount_opts, verbose, verbose); + + /* + * Start the filesystem once, to allocate the journal and create + * the root directory: + */ + struct bch_fs *c = bch2_fs_open(device_paths.data, + device_paths.nr, + mount_opts); + if (IS_ERR(c)) + die("error opening %s: %s", device_paths.data[0], + bch2_err_str(PTR_ERR(c))); + + if (opts.source) { + build_fs(c, opts.source); + } + + + bch2_fs_stop(c); + } + + darray_exit(&device_paths); + + return 0; +} + +static void show_super_usage(void) +{ + puts("bcachefs show-super \n" + "Usage: bcachefs show-super [OPTION].. device\n" + "\n" + "Options:\n" + " -f, --fields=(fields) list of sections to print\n" + " --field-only=fiel) print superblock section only, no header\n" + " -l, --layout print superblock layout\n" + " -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +int cmd_show_super(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "fields", 1, NULL, 'f' }, + { "field-only", 1, NULL, 'F' }, + { "layout", 0, NULL, 'l' }, + { "help", 0, NULL, 'h' }, + { NULL } + }; + unsigned fields = 0; + int field_only = -1; + bool print_layout = false; + bool print_default_fields = true; + int opt; + + while ((opt = getopt_long(argc, argv, "f:lh", longopts, NULL)) != -1) + switch (opt) { + case 'f': + fields = !strcmp(optarg, "all") + ? ~0 + : read_flag_list_or_die(optarg, + bch2_sb_fields, "superblock field"); + print_default_fields = false; + break; + case 'F': + field_only = read_string_list_or_die(optarg, + bch2_sb_fields, "superblock field"); + print_default_fields = false; + break; + case 'l': + print_layout = true; + break; + case 'h': + show_super_usage(); + break; + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("please supply a device"); + if (argc) + die("too many arguments"); + + struct bch_opts opts = bch2_opts_empty(); + + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + + struct bch_sb_handle sb; + int ret = bch2_read_super(dev, &opts, &sb); + if (ret) + die("Error opening %s: %s", dev, bch2_err_str(ret)); + + if (print_default_fields) { + fields |= bch2_sb_field_get(sb.sb, members_v2) + ? 1 << BCH_SB_FIELD_members_v2 + : 1 << BCH_SB_FIELD_members_v1; + fields |= 1 << BCH_SB_FIELD_errors; + } + + struct printbuf buf = PRINTBUF; + + buf.human_readable_units = true; + + if (field_only >= 0) { + struct bch_sb_field *f = bch2_sb_field_get_id(sb.sb, field_only); + + if (f) + __bch2_sb_field_to_text(&buf, sb.sb, f); + } else { + printbuf_tabstop_push(&buf, 44); + + char *model = fd_to_dev_model(sb.bdev->bd_fd); + prt_str(&buf, "Device:"); + prt_tab(&buf); + prt_str(&buf, model); + prt_newline(&buf); + free(model); + + bch2_sb_to_text(&buf, sb.sb, print_layout, fields); + } + printf("%s", buf.buf); + + bch2_free_super(&sb); + printbuf_exit(&buf); + return 0; +} diff --git a/c_src/cmd_fs.c b/c_src/cmd_fs.c new file mode 100644 index 00000000..82eeceff --- /dev/null +++ b/c_src/cmd_fs.c @@ -0,0 +1,544 @@ +#include <getopt.h> +#include <stdio.h> +#include <sys/ioctl.h> + +#include <uuid/uuid.h> + +#include "linux/sort.h" +#include "linux/rcupdate.h" + +#include "libbcachefs/bcachefs_ioctl.h" +#include "libbcachefs/buckets.h" +#include "libbcachefs/disk_accounting.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/super-io.h" + +#include "cmds.h" +#include "libbcachefs.h" + +#include "libbcachefs/darray.h" + +static void __dev_usage_type_to_text(struct printbuf *out, + enum bch_data_type type, + unsigned bucket_size, + u64 buckets, u64 sectors, u64 frag) +{ + bch2_prt_data_type(out, type); + prt_char(out, ':'); + prt_tab(out); + + prt_units_u64(out, sectors << 9); + prt_tab_rjust(out); + + prt_printf(out, "%llu", buckets); + prt_tab_rjust(out); + + if (frag) { + prt_units_u64(out, frag << 9); + prt_tab_rjust(out); + } + prt_newline(out); +} + +static void dev_usage_type_to_text(struct printbuf *out, + struct bch_ioctl_dev_usage_v2 *u, + enum bch_data_type type) +{ + u64 sectors = 0; + switch (type) { + case BCH_DATA_free: + case BCH_DATA_need_discard: + case BCH_DATA_need_gc_gens: + /* sectors are 0 for these types so calculate sectors for them */ + sectors = u->d[type].buckets * u->bucket_size; + break; + default: + sectors = u->d[type].sectors; + } + + __dev_usage_type_to_text(out, type, + u->bucket_size, + u->d[type].buckets, + sectors, + u->d[type].fragmented); +} + +static void dev_usage_to_text(struct printbuf *out, + struct bchfs_handle fs, + struct dev_name *d) +{ + struct bch_ioctl_dev_usage_v2 *u = bchu_dev_usage(fs, d->idx); + + prt_newline(out); + prt_printf(out, "%s (device %u):", d->label ?: "(no label)", d->idx); + prt_tab(out); + prt_str(out, d->dev ?: "(device not found)"); + prt_tab_rjust(out); + + prt_str(out, bch2_member_states[u->state]); + prt_tab_rjust(out); + + prt_newline(out); + + printbuf_indent_add(out, 2); + prt_tab(out); + + prt_str(out, "data"); + prt_tab_rjust(out); + + prt_str(out, "buckets"); + prt_tab_rjust(out); + + prt_str(out, "fragmented"); + prt_tab_rjust(out); + + prt_newline(out); + + for (unsigned i = 0; i < u->nr_data_types; i++) + dev_usage_type_to_text(out, u, i); + + prt_str(out, "capacity:"); + prt_tab(out); + + prt_units_u64(out, (u->nr_buckets * u->bucket_size) << 9); + prt_tab_rjust(out); + prt_printf(out, "%llu", u->nr_buckets); + prt_tab_rjust(out); + + printbuf_indent_sub(out, 2); + + prt_newline(out); + free(u); +} + +static int dev_by_label_cmp(const void *_l, const void *_r) +{ + const struct dev_name *l = _l, *r = _r; + + return (l->label && r->label + ? strcmp(l->label, r->label) : 0) ?: + (l->dev && r->dev + ? strcmp(l->dev, r->dev) : 0) ?: + cmp_int(l->idx, r->idx); +} + +static struct dev_name *dev_idx_to_name(dev_names *dev_names, unsigned idx) +{ + darray_for_each(*dev_names, dev) + if (dev->idx == idx) + return dev; + return NULL; +} + +static void devs_usage_to_text(struct printbuf *out, + struct bchfs_handle fs, + dev_names dev_names) +{ + sort(dev_names.data, dev_names.nr, + sizeof(dev_names.data[0]), dev_by_label_cmp, NULL); + + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 20); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 14); + + darray_for_each(dev_names, dev) + dev_usage_to_text(out, fs, dev); + + darray_for_each(dev_names, dev) { + free(dev->dev); + free(dev->label); + } +} + +static void persistent_reserved_to_text(struct printbuf *out, + unsigned nr_replicas, s64 sectors) +{ + if (!sectors) + return; + + prt_str(out, "reserved:"); + prt_tab(out); + prt_printf(out, "%u/%u ", 1, nr_replicas); + prt_tab(out); + prt_str(out, "[] "); + prt_units_u64(out, sectors << 9); + prt_tab_rjust(out); + prt_newline(out); +} + +static void replicas_usage_to_text(struct printbuf *out, + const struct bch_replicas_entry_v1 *r, + s64 sectors, + dev_names *dev_names) +{ + if (!sectors) + return; + + char devs[4096], *d = devs; + *d++ = '['; + + unsigned durability = 0; + + for (unsigned i = 0; i < r->nr_devs; i++) { + unsigned dev_idx = r->devs[i]; + struct dev_name *dev = dev_idx_to_name(dev_names, dev_idx); + + durability += dev ? dev->durability : 0; + + if (i) + *d++ = ' '; + + d += dev && dev->dev + ? sprintf(d, "%s", dev->dev) + : sprintf(d, "%u", dev_idx); + } + *d++ = ']'; + *d++ = '\0'; + + bch2_prt_data_type(out, r->data_type); + prt_char(out, ':'); + prt_tab(out); + + prt_printf(out, "%u/%u ", r->nr_required, r->nr_devs); + prt_tab(out); + + prt_printf(out, "%u ", durability); + prt_tab(out); + + prt_printf(out, "%s ", devs); + prt_tab(out); + + prt_units_u64(out, sectors << 9); + prt_tab_rjust(out); + prt_newline(out); +} + +#define for_each_usage_replica(_u, _r) \ + for (_r = (_u)->replicas; \ + _r != (void *) (_u)->replicas + (_u)->replica_entries_bytes;\ + _r = replicas_usage_next(_r), \ + BUG_ON((void *) _r > (void *) (_u)->replicas + (_u)->replica_entries_bytes)) + +typedef DARRAY(struct bkey_i_accounting *) darray_accounting_p; + +static int accounting_p_cmp(const void *_l, const void *_r) +{ + const struct bkey_i_accounting * const *l = _l; + const struct bkey_i_accounting * const *r = _r; + + struct bpos lp = (*l)->k.p, rp = (*r)->k.p; + + bch2_bpos_swab(&lp); + bch2_bpos_swab(&rp); + return bpos_cmp(lp, rp); +} + +static void accounting_sort(darray_accounting_p *sorted, + struct bch_ioctl_query_accounting *in) +{ + for (struct bkey_i_accounting *a = in->accounting; + a < (struct bkey_i_accounting *) ((u64 *) in->accounting + in->accounting_u64s); + a = bkey_i_to_accounting(bkey_next(&a->k_i))) + if (darray_push(sorted, a)) + die("memory allocation failure"); + + sort(sorted->data, sorted->nr, sizeof(sorted->data[0]), accounting_p_cmp, NULL); +} + +static int fs_usage_v1_to_text(struct printbuf *out, + struct bchfs_handle fs, + dev_names dev_names) +{ + struct bch_ioctl_query_accounting *a = + bchu_fs_accounting(fs, + BIT(BCH_DISK_ACCOUNTING_persistent_reserved)| + BIT(BCH_DISK_ACCOUNTING_replicas)| + BIT(BCH_DISK_ACCOUNTING_compression)| + BIT(BCH_DISK_ACCOUNTING_btree)| + BIT(BCH_DISK_ACCOUNTING_rebalance_work)); + if (!a) + return -1; + + darray_accounting_p a_sorted = {}; + + accounting_sort(&a_sorted, a); + + prt_str(out, "Filesystem: "); + pr_uuid(out, fs.uuid.b); + prt_newline(out); + + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 20); + printbuf_tabstop_push(out, 16); + + prt_str(out, "Size:"); + prt_tab(out); + prt_units_u64(out, a->capacity << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_str(out, "Used:"); + prt_tab(out); + prt_units_u64(out, a->used << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_str(out, "Online reserved:"); + prt_tab(out); + prt_units_u64(out, a->online_reserved << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_newline(out); + + printbuf_tabstops_reset(out); + + printbuf_tabstop_push(out, 16); + prt_str(out, "Data type"); + prt_tab(out); + + printbuf_tabstop_push(out, 16); + prt_str(out, "Required/total"); + prt_tab(out); + + printbuf_tabstop_push(out, 14); + prt_str(out, "Durability"); + prt_tab(out); + + printbuf_tabstop_push(out, 14); + prt_str(out, "Devices"); + prt_newline(out); + + printbuf_tabstop_push(out, 14); + + unsigned prev_type = 0; + + darray_for_each(a_sorted, i) { + struct bkey_i_accounting *a = *i; + + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a->k.p); + + bool new_type = acc_k.type != prev_type; + prev_type = acc_k.type; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + persistent_reserved_to_text(out, + acc_k.persistent_reserved.nr_replicas, + a->v.d[0]); + break; + case BCH_DISK_ACCOUNTING_replicas: + replicas_usage_to_text(out, &acc_k.replicas, a->v.d[0], &dev_names); + break; + case BCH_DISK_ACCOUNTING_compression: + if (new_type) { + prt_printf(out, "\nCompression:\n"); + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 12); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 16); + printbuf_tabstop_push(out, 24); + prt_printf(out, "type\tcompressed\runcompressed\raverage extent size\r\n"); + } + + u64 nr_extents = a->v.d[0]; + u64 sectors_uncompressed = a->v.d[1]; + u64 sectors_compressed = a->v.d[2]; + + bch2_prt_compression_type(out, acc_k.compression.type); + prt_tab(out); + + prt_human_readable_u64(out, sectors_compressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, sectors_uncompressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, nr_extents + ? div_u64(sectors_uncompressed << 9, nr_extents) + : 0); + prt_tab_rjust(out); + prt_newline(out); + break; + case BCH_DISK_ACCOUNTING_btree: + if (new_type) { + prt_printf(out, "\nBtree usage:\n"); + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 12); + printbuf_tabstop_push(out, 16); + } + prt_printf(out, "%s:\t", bch2_btree_id_str(acc_k.btree.id)); + prt_units_u64(out, a->v.d[0] << 9); + prt_tab_rjust(out); + prt_newline(out); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + if (new_type) + prt_printf(out, "\nPending rebalance work:\n"); + prt_units_u64(out, a->v.d[0] << 9); + prt_newline(out); + break; + } + } + + darray_exit(&a_sorted); + free(a); + return 0; +} + +static void fs_usage_v0_to_text(struct printbuf *out, + struct bchfs_handle fs, + dev_names dev_names) +{ + struct bch_ioctl_fs_usage *u = bchu_fs_usage(fs); + + prt_str(out, "Filesystem: "); + pr_uuid(out, fs.uuid.b); + prt_newline(out); + + printbuf_tabstops_reset(out); + printbuf_tabstop_push(out, 20); + printbuf_tabstop_push(out, 16); + + prt_str(out, "Size:"); + prt_tab(out); + prt_units_u64(out, u->capacity << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_str(out, "Used:"); + prt_tab(out); + prt_units_u64(out, u->used << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_str(out, "Online reserved:"); + prt_tab(out); + prt_units_u64(out, u->online_reserved << 9); + prt_tab_rjust(out); + prt_newline(out); + + prt_newline(out); + + printbuf_tabstops_reset(out); + + printbuf_tabstop_push(out, 16); + prt_str(out, "Data type"); + prt_tab(out); + + printbuf_tabstop_push(out, 16); + prt_str(out, "Required/total"); + prt_tab(out); + + printbuf_tabstop_push(out, 14); + prt_str(out, "Durability"); + prt_tab(out); + + printbuf_tabstop_push(out, 14); + prt_str(out, "Devices"); + prt_newline(out); + + printbuf_tabstop_push(out, 14); + + for (unsigned i = 0; i < BCH_REPLICAS_MAX; i++) + persistent_reserved_to_text(out, i, u->persistent_reserved[i]); + + struct bch_replicas_usage *r; + + for_each_usage_replica(u, r) + if (r->r.data_type < BCH_DATA_user) + replicas_usage_to_text(out, &r->r, r->sectors, &dev_names); + + for_each_usage_replica(u, r) + if (r->r.data_type == BCH_DATA_user && + r->r.nr_required <= 1) + replicas_usage_to_text(out, &r->r, r->sectors, &dev_names); + + for_each_usage_replica(u, r) + if (r->r.data_type == BCH_DATA_user && + r->r.nr_required > 1) + replicas_usage_to_text(out, &r->r, r->sectors, &dev_names); + + for_each_usage_replica(u, r) + if (r->r.data_type > BCH_DATA_user) + replicas_usage_to_text(out, &r->r, r->sectors, &dev_names); + + free(u); +} + +static void fs_usage_to_text(struct printbuf *out, const char *path) +{ + struct bchfs_handle fs = bcache_fs_open(path); + + dev_names dev_names = bchu_fs_get_devices(fs); + + if (!fs_usage_v1_to_text(out, fs, dev_names)) + goto devs; + + fs_usage_v0_to_text(out, fs, dev_names); +devs: + devs_usage_to_text(out, fs, dev_names); + + darray_exit(&dev_names); + + bcache_fs_close(fs); +} + +static void fs_usage_usage(void) +{ + puts("bcachefs fs usage - display detailed filesystem usage\n" + "Usage: bcachefs fs usage [OPTION]... <mountpoint>\n" + "\n" + "Options:\n" + " -h, --human-readable Human readable units\n" + " -H, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_fs_usage(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "help", no_argument, NULL, 'H' }, + { "human-readable", no_argument, NULL, 'h' }, + { NULL } + }; + bool human_readable = false; + struct printbuf buf = PRINTBUF; + char *fs; + int opt; + + while ((opt = getopt_long(argc, argv, "h", + longopts, NULL)) != -1) + switch (opt) { + case 'h': + human_readable = true; + break; + case 'H': + fs_usage_usage(); + exit(EXIT_SUCCESS); + default: + fs_usage_usage(); + exit(EXIT_FAILURE); + } + args_shift(optind); + + if (!argc) { + printbuf_reset(&buf); + buf.human_readable_units = human_readable; + fs_usage_to_text(&buf, "."); + printf("%s", buf.buf); + } else { + while ((fs = arg_pop())) { + printbuf_reset(&buf); + buf.human_readable_units = human_readable; + fs_usage_to_text(&buf, fs); + printf("%s", buf.buf); + } + } + + printbuf_exit(&buf); + return 0; +} diff --git a/c_src/cmd_fsck.c b/c_src/cmd_fsck.c new file mode 100644 index 00000000..2ea51ff2 --- /dev/null +++ b/c_src/cmd_fsck.c @@ -0,0 +1,348 @@ + +#include <errno.h> +#include <getopt.h> +#include <sys/uio.h> +#include <unistd.h> +#include "cmds.h" +#include "libbcachefs/error.h" +#include "libbcachefs.h" +#include "libbcachefs/super.h" +#include "libbcachefs/super-io.h" +#include "tools-util.h" + +static void fsck_usage(void) +{ + puts("bcachefs fsck - filesystem check and repair\n" + "Usage: bcachefs fsck [OPTION]... <devices>\n" + "\n" + "Options:\n" + " -p Automatic repair (no questions)\n" + " -n Don't repair, only check for errors\n" + " -y Assume \"yes\" to all questions\n" + " -f Force checking even if filesystem is marked clean\n" + " -r, --ratelimit_errors Don't display more than 10 errors of a given type\n" + " -R, --reconstruct_alloc Reconstruct the alloc btree\n" + " -k, --kernel Use the in-kernel fsck implementation\n" + " -v Be verbose\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +static void setnonblocking(int fd) +{ + int flags = fcntl(fd, F_GETFL); + if (fcntl(fd, F_SETFL, flags|O_NONBLOCK)) + die("fcntl error: %m"); +} + +static int do_splice(int rfd, int wfd) +{ + char buf[4096], *b = buf; + + int r = read(rfd, buf, sizeof(buf)); + if (r < 0 && errno == EAGAIN) + return 0; + if (r < 0) + return r; + if (!r) + return 1; + do { + ssize_t w = write(wfd, b, r); + if (w < 0) + die("%s: write error: %m", __func__); + r -= w; + b += w; + } while (r); + return 0; +} + +static int splice_fd_to_stdinout(int fd) +{ + setnonblocking(STDIN_FILENO); + setnonblocking(fd); + + bool stdin_closed = false; + + while (true) { + fd_set fds; + + FD_ZERO(&fds); + FD_SET(fd, &fds); + if (!stdin_closed) + FD_SET(STDIN_FILENO, &fds); + + if (select(fd + 1, &fds, NULL, NULL, NULL) < 0) + die("select error: %m"); + + int r = do_splice(fd, STDOUT_FILENO); + if (r < 0) + return r; + if (r) + break; + + r = do_splice(STDIN_FILENO, fd); + if (r < 0) + return r; + if (r) + stdin_closed = true; + } + + return close(fd); +} + +static int fsck_online(const char *dev_path, const char *opt_str) +{ + int dev_idx; + struct bchfs_handle fs = bchu_fs_open_by_dev(dev_path, &dev_idx); + + struct bch_ioctl_fsck_online fsck = { + .opts = (unsigned long) opt_str + }; + + int fsck_fd = ioctl(fs.ioctl_fd, BCH_IOCTL_FSCK_ONLINE, &fsck); + if (fsck_fd < 0) + die("BCH_IOCTL_FSCK_ONLINE error: %s", bch2_err_str(errno)); + + return splice_fd_to_stdinout(fsck_fd); +} + +static void append_opt(struct printbuf *out, const char *opt) +{ + if (out->pos) + prt_char(out, ','); + prt_str(out, opt); +} + +static bool should_use_kernel_fsck(darray_str devs) +{ + system("modprobe bcachefs"); + + unsigned kernel_version = !access("/sys/module/bcachefs/parameters/version", R_OK) + ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version") + : 0; + + if (!kernel_version) + return false; + + if (kernel_version == bcachefs_metadata_version_current) + return false; + + struct bch_opts opts = bch2_opts_empty(); + opt_set(opts, nostart, true); + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + opt_set(opts, read_only, true); + + struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + if (IS_ERR(c)) + return false; + + bool ret = ((bcachefs_metadata_version_current < kernel_version && + kernel_version <= c->sb.version) || + (c->sb.version <= kernel_version && + kernel_version < bcachefs_metadata_version_current)); + + if (ret) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "fsck binary is version "); + bch2_version_to_text(&buf, bcachefs_metadata_version_current); + prt_str(&buf, " but filesystem is "); + bch2_version_to_text(&buf, c->sb.version); + prt_str(&buf, " and kernel is "); + bch2_version_to_text(&buf, kernel_version); + prt_str(&buf, ", using kernel fsck\n"); + + printf("%s", buf.buf); + printbuf_exit(&buf); + } + + bch2_fs_stop(c); + + return ret; +} + +static bool is_blockdev(const char *path) +{ + struct stat s; + if (stat(path, &s)) + return true; + return S_ISBLK(s.st_mode); +} + +static void loopdev_free(const char *path) +{ + char *cmd = mprintf("losetup -d %s", path); + system(cmd); + free(cmd); +} + +static char *loopdev_alloc(const char *path) +{ + char *cmd = mprintf("losetup --show -f %s", path); + FILE *f = popen(cmd, "r"); + free(cmd); + if (!f) { + fprintf(stderr, "error executing losetup: %m\n"); + return NULL; + } + + char *line = NULL; + size_t n = 0; + getline(&line, &n, f); + int ret = pclose(f); + if (ret) { + fprintf(stderr, "error executing losetup: %i\n", ret); + free(line); + return NULL; + } + + strim(line); + return line; +} + +int cmd_fsck(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "ratelimit_errors", no_argument, NULL, 'r' }, + { "reconstruct_alloc", no_argument, NULL, 'R' }, + { "kernel", no_argument, NULL, 'k' }, + { "no-kernel", no_argument, NULL, 'K' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + int kernel = -1; /* unset */ + int opt, ret = 0; + struct printbuf opts_str = PRINTBUF; + + if (getenv("BCACHEFS_KERNEL_ONLY")) + kernel = true; + + append_opt(&opts_str, "degraded"); + append_opt(&opts_str, "fsck"); + append_opt(&opts_str, "fix_errors=ask"); + append_opt(&opts_str, "read_only"); + + while ((opt = getopt_long(argc, argv, + "apynfo:rRkKvh", + longopts, NULL)) != -1) + switch (opt) { + case 'a': /* outdated alias for -p */ + case 'p': + case 'y': + append_opt(&opts_str, "fix_errors=yes"); + break; + case 'n': + append_opt(&opts_str, "nochanges"); + append_opt(&opts_str, "fix_errors=no"); + break; + case 'f': + /* force check, even if filesystem marked clean: */ + break; + case 'o': + append_opt(&opts_str, optarg); + break; + case 'r': + append_opt(&opts_str, "ratelimit_errors"); + break; + case 'R': + append_opt(&opts_str, "reconstruct_alloc"); + break; + case 'k': + kernel = true; + break; + case 'K': + kernel = false; + break; + case 'v': + append_opt(&opts_str, "verbose"); + break; + case 'h': + fsck_usage(); + exit(16); + } + args_shift(optind); + + if (!argc) { + fprintf(stderr, "Please supply device(s) to check\n"); + exit(8); + } + + darray_str devs = get_or_split_cmdline_devs(argc, argv); + + darray_for_each(devs, i) + if (dev_mounted(*i)) { + printf("Running fsck online\n"); + return fsck_online(*i, opts_str.buf); + } + + int kernel_probed = kernel; + if (kernel_probed < 0) + kernel_probed = should_use_kernel_fsck(devs); + + struct bch_opts opts = bch2_opts_empty(); + struct printbuf parse_later = PRINTBUF; + + if (kernel_probed) { + darray_str loopdevs = {}; + int fsck_fd = -1; + + printf("Running in-kernel offline fsck\n"); + struct bch_ioctl_fsck_offline *fsck = calloc(sizeof(*fsck) + sizeof(u64) * devs.nr, 1); + + fsck->opts = (unsigned long)opts_str.buf; + darray_for_each(devs, i) { + if (is_blockdev(*i)) { + fsck->devs[i - devs.data] = (unsigned long) *i; + } else { + char *l = loopdev_alloc(*i); + if (!l) + goto kernel_fsck_err; + darray_push(&loopdevs, l); + fsck->devs[i - devs.data] = (unsigned long) l; + } + } + fsck->nr_devs = devs.nr; + + int ctl_fd = bcachectl_open(); + fsck_fd = ioctl(ctl_fd, BCH_IOCTL_FSCK_OFFLINE, fsck); +kernel_fsck_err: + free(fsck); + + darray_for_each(loopdevs, i) + loopdev_free(*i); + darray_exit(&loopdevs); + + if (fsck_fd < 0 && kernel < 0) + goto userland_fsck; + + if (fsck_fd < 0) + die("BCH_IOCTL_FSCK_OFFLINE error: %s", bch2_err_str(errno)); + + ret = splice_fd_to_stdinout(fsck_fd); + } else { +userland_fsck: + printf("Running userspace offline fsck\n"); + ret = bch2_parse_mount_opts(NULL, &opts, &parse_later, opts_str.buf); + if (ret) + return ret; + + struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + if (IS_ERR(c)) + exit(8); + + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { + fprintf(stderr, "%s: errors fixed\n", c->name); + ret |= 1; + } + if (test_bit(BCH_FS_error, &c->flags)) { + fprintf(stderr, "%s: still has errors\n", c->name); + ret |= 4; + } + + bch2_fs_stop(c); + } + + printbuf_exit(&opts_str); + return ret; +} diff --git a/c_src/cmd_fusemount.c b/c_src/cmd_fusemount.c new file mode 100644 index 00000000..e5674b42 --- /dev/null +++ b/c_src/cmd_fusemount.c @@ -0,0 +1,1314 @@ +#ifdef BCACHEFS_FUSE + +#include <errno.h> +#include <float.h> +#include <getopt.h> +#include <stdio.h> +#include <sys/statvfs.h> + +#include <fuse_lowlevel.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "tools-util.h" + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/alloc_foreground.h" +#include "libbcachefs/btree_iter.h" +#include "libbcachefs/buckets.h" +#include "libbcachefs/dirent.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/error.h" +#include "libbcachefs/fs-common.h" +#include "libbcachefs/inode.h" +#include "libbcachefs/io_read.h" +#include "libbcachefs/io_write.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/super.h" + +/* mode_to_type(): */ +#include "libbcachefs/fs.h" + +#include <linux/dcache.h> + +/* XXX cut and pasted from fsck.c */ +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +/* used by write_aligned function for waiting on bch2_write closure */ +struct write_aligned_op_t { + struct closure cl; + + /* must be last: */ + struct bch_write_op op; +}; + + +static inline subvol_inum map_root_ino(u64 ino) +{ + return (subvol_inum) { 1, ino == 1 ? 4096 : ino }; +} + +static inline u64 unmap_root_ino(u64 ino) +{ + return ino == 4096 ? 1 : ino; +} + +static struct stat inode_to_stat(struct bch_fs *c, + struct bch_inode_unpacked *bi) +{ + return (struct stat) { + .st_ino = unmap_root_ino(bi->bi_inum), + .st_size = bi->bi_size, + .st_mode = bi->bi_mode, + .st_uid = bi->bi_uid, + .st_gid = bi->bi_gid, + .st_nlink = bch2_inode_nlink_get(bi), + .st_rdev = bi->bi_dev, + .st_blksize = block_bytes(c), + .st_blocks = bi->bi_sectors, + .st_atim = bch2_time_to_timespec(c, bi->bi_atime), + .st_mtim = bch2_time_to_timespec(c, bi->bi_mtime), + .st_ctim = bch2_time_to_timespec(c, bi->bi_ctime), + }; +} + +static struct fuse_entry_param inode_to_entry(struct bch_fs *c, + struct bch_inode_unpacked *bi) +{ + return (struct fuse_entry_param) { + .ino = unmap_root_ino(bi->bi_inum), + .generation = bi->bi_generation, + .attr = inode_to_stat(c, bi), + .attr_timeout = DBL_MAX, + .entry_timeout = DBL_MAX, + }; +} + +static void bcachefs_fuse_init(void *arg, struct fuse_conn_info *conn) +{ + if (conn->capable & FUSE_CAP_WRITEBACK_CACHE) { + fuse_log(FUSE_LOG_DEBUG, "fuse_init: activating writeback\n"); + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + } else + fuse_log(FUSE_LOG_DEBUG, "fuse_init: writeback not capable\n"); + + //conn->want |= FUSE_CAP_POSIX_ACL; +} + +static void bcachefs_fuse_destroy(void *arg) +{ + struct bch_fs *c = arg; + + bch2_fs_stop(c); +} + +static void bcachefs_fuse_lookup(fuse_req_t req, fuse_ino_t dir_ino, + const char *name) +{ + subvol_inum dir = map_root_ino(dir_ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked bi; + struct qstr qstr = QSTR(name); + subvol_inum inum; + int ret; + + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup(dir=%llu name=%s)\n", + dir.inum, name); + + ret = bch2_inode_find_by_inum(c, dir, &bi); + if (ret) { + fuse_reply_err(req, -ret); + return; + } + + struct bch_hash_info hash_info = bch2_hash_info_init(c, &bi); + + ret = bch2_dirent_lookup(c, dir, &hash_info, &qstr, &inum); + if (ret) { + struct fuse_entry_param e = { + .attr_timeout = DBL_MAX, + .entry_timeout = DBL_MAX, + }; + fuse_reply_entry(req, &e); + return; + } + + ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) + goto err; + + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup ret(inum=%llu)\n", + bi.bi_inum); + + struct fuse_entry_param e = inode_to_entry(c, &bi); + fuse_reply_entry(req, &e); + return; +err: + fuse_log(FUSE_LOG_DEBUG, "fuse_lookup error %i\n", ret); + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_getattr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + subvol_inum inum = map_root_ino(ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked bi; + struct stat attr; + + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr(inum=%llu)\n", inum.inum); + + int ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) { + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr error %i\n", ret); + fuse_reply_err(req, -ret); + return; + } + + fuse_log(FUSE_LOG_DEBUG, "fuse_getattr success\n"); + + attr = inode_to_stat(c, &bi); + fuse_reply_attr(req, &attr, DBL_MAX); +} + +static void bcachefs_fuse_setattr(fuse_req_t req, fuse_ino_t ino, + struct stat *attr, int to_set, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked inode_u; + struct btree_trans *trans; + struct btree_iter iter; + u64 now; + int ret; + + subvol_inum inum = map_root_ino(ino); + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_setattr(%llu, %x)\n", inum.inum, to_set); + + trans = bch2_trans_get(c); +retry: + bch2_trans_begin(trans); + now = bch2_current_time(c); + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); + if (ret) + goto err; + + if (to_set & FUSE_SET_ATTR_MODE) + inode_u.bi_mode = attr->st_mode; + if (to_set & FUSE_SET_ATTR_UID) + inode_u.bi_uid = attr->st_uid; + if (to_set & FUSE_SET_ATTR_GID) + inode_u.bi_gid = attr->st_gid; + if (to_set & FUSE_SET_ATTR_SIZE) + inode_u.bi_size = attr->st_size; + if (to_set & FUSE_SET_ATTR_ATIME) + inode_u.bi_atime = timespec_to_bch2_time(c, attr->st_atim); + if (to_set & FUSE_SET_ATTR_MTIME) + inode_u.bi_mtime = timespec_to_bch2_time(c, attr->st_mtim); + if (to_set & FUSE_SET_ATTR_ATIME_NOW) + inode_u.bi_atime = now; + if (to_set & FUSE_SET_ATTR_MTIME_NOW) + inode_u.bi_mtime = now; + /* TODO: CTIME? */ + + ret = bch2_inode_write(trans, &iter, &inode_u) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); +err: + bch2_trans_iter_exit(trans, &iter); + if (ret == -EINTR) + goto retry; + + bch2_trans_put(trans); + + if (!ret) { + *attr = inode_to_stat(c, &inode_u); + fuse_reply_attr(req, attr, DBL_MAX); + } else { + fuse_reply_err(req, -ret); + } +} + +static int do_create(struct bch_fs *c, subvol_inum dir, + const char *name, mode_t mode, dev_t rdev, + struct bch_inode_unpacked *new_inode) +{ + struct qstr qstr = QSTR(name); + struct bch_inode_unpacked dir_u; + uid_t uid = 0; + gid_t gid = 0; + + bch2_inode_init_early(c, new_inode); + + return bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_create_trans(trans, + dir, &dir_u, + new_inode, &qstr, + uid, gid, mode, rdev, NULL, NULL, + (subvol_inum) { 0 }, 0)); +} + +static void bcachefs_fuse_mknod(fuse_req_t req, fuse_ino_t dir_ino, + const char *name, mode_t mode, + dev_t rdev) +{ + subvol_inum dir = map_root_ino(dir_ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked new_inode; + int ret; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mknod(%llu, %s, %x, %x)\n", + dir.inum, name, mode, rdev); + + ret = do_create(c, dir, name, mode, rdev, &new_inode); + if (ret) + goto err; + + struct fuse_entry_param e = inode_to_entry(c, &new_inode); + fuse_reply_entry(req, &e); + return; +err: + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_mkdir(fuse_req_t req, fuse_ino_t dir, + const char *name, mode_t mode) +{ + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_mkdir(%llu, %s, %x)\n", + dir, name, mode); + + BUG_ON(mode & S_IFMT); + + mode |= S_IFDIR; + bcachefs_fuse_mknod(req, dir, name, mode, 0); +} + +static void bcachefs_fuse_unlink(fuse_req_t req, fuse_ino_t dir_ino, + const char *name) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked dir_u, inode_u; + struct qstr qstr = QSTR(name); + subvol_inum dir = map_root_ino(dir_ino); + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_unlink(%llu, %s)\n", dir.inum, name); + + int ret = bch2_trans_commit_do(c, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc, + bch2_unlink_trans(trans, dir, &dir_u, + &inode_u, &qstr, false)); + + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_rmdir(fuse_req_t req, fuse_ino_t dir, + const char *name) +{ + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_rmdir(%llu, %s)\n", dir, name); + + bcachefs_fuse_unlink(req, dir, name); +} + +static void bcachefs_fuse_rename(fuse_req_t req, + fuse_ino_t src_dir_ino, const char *srcname, + fuse_ino_t dst_dir_ino, const char *dstname, + unsigned flags) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked dst_dir_u, src_dir_u; + struct bch_inode_unpacked src_inode_u, dst_inode_u; + struct qstr dst_name = QSTR(srcname); + struct qstr src_name = QSTR(dstname); + subvol_inum src_dir = map_root_ino(src_dir_ino); + subvol_inum dst_dir = map_root_ino(dst_dir_ino); + int ret; + + fuse_log(FUSE_LOG_DEBUG, + "bcachefs_fuse_rename(%llu, %s, %llu, %s, %x)\n", + src_dir.inum, srcname, dst_dir.inum, dstname, flags); + + /* XXX handle overwrites */ + ret = bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_rename_trans(trans, + src_dir, &src_dir_u, + dst_dir, &dst_dir_u, + &src_inode_u, &dst_inode_u, + &src_name, &dst_name, + BCH_RENAME)); + + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_link(fuse_req_t req, fuse_ino_t ino, + fuse_ino_t newparent_ino, const char *newname) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked dir_u, inode_u; + struct qstr qstr = QSTR(newname); + subvol_inum newparent = map_root_ino(newparent_ino); + subvol_inum inum = map_root_ino(ino); + int ret; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_link(%llu, %llu, %s)\n", + inum.inum, newparent.inum, newname); + + ret = bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_link_trans(trans, newparent, &dir_u, + inum, &inode_u, &qstr)); + + if (!ret) { + struct fuse_entry_param e = inode_to_entry(c, &inode_u); + fuse_reply_entry(req, &e); + } else { + fuse_reply_err(req, -ret); + } +} + +static void bcachefs_fuse_open(fuse_req_t req, fuse_ino_t inum, + struct fuse_file_info *fi) +{ + fi->direct_io = false; + fi->keep_cache = true; + fi->cache_readdir = true; + + fuse_reply_open(req, fi); +} + +static void userbio_init(struct bio *bio, struct bio_vec *bv, + void *buf, size_t size) +{ + bio_init(bio, NULL, bv, 1, 0); + bio->bi_iter.bi_size = size; + bv->bv_page = buf; + bv->bv_len = size; + bv->bv_offset = 0; +} + +static int get_inode_io_opts(struct bch_fs *c, subvol_inum inum, struct bch_io_opts *opts) +{ + struct bch_inode_unpacked inode; + if (bch2_inode_find_by_inum(c, inum, &inode)) + return -EINVAL; + + bch2_inode_opts_get(opts, c, &inode); + return 0; +} + +static void bcachefs_fuse_read_endio(struct bio *bio) +{ + closure_put(bio->bi_private); +} + + +static void bcachefs_fuse_write_endio(struct bch_write_op *op) +{ + struct write_aligned_op_t *w = container_of(op,struct write_aligned_op_t,op); + closure_put(&w->cl); +} + + +struct fuse_align_io { + off_t start; + size_t pad_start; + off_t end; + size_t pad_end; + size_t size; +}; + +/* Handle unaligned start and end */ +/* TODO: align to block_bytes, sector size, or page size? */ +static struct fuse_align_io align_io(const struct bch_fs *c, size_t size, + off_t offset) +{ + struct fuse_align_io align; + + BUG_ON(offset < 0); + + align.start = round_down(offset, block_bytes(c)); + align.pad_start = offset - align.start; + + off_t end = offset + size; + align.end = round_up(end, block_bytes(c)); + align.pad_end = align.end - end; + + align.size = align.end - align.start; + + return align; +} + +/* + * Given an aligned number of bytes transferred, figure out how many unaligned + * bytes were transferred. + */ +static size_t align_fix_up_bytes(const struct fuse_align_io *align, + size_t align_bytes) +{ + size_t bytes = 0; + + if (align_bytes > align->pad_start) { + bytes = align_bytes - align->pad_start; + bytes = bytes > align->pad_end ? bytes - align->pad_end : 0; + } + + return bytes; +} + +/* + * Read aligned data. + */ +static int read_aligned(struct bch_fs *c, subvol_inum inum, size_t aligned_size, + off_t aligned_offset, void *buf) +{ + BUG_ON(aligned_size & (block_bytes(c) - 1)); + BUG_ON(aligned_offset & (block_bytes(c) - 1)); + + struct bch_io_opts io_opts; + if (get_inode_io_opts(c, inum, &io_opts)) + return -ENOENT; + + struct bch_read_bio rbio; + struct bio_vec bv; + userbio_init(&rbio.bio, &bv, buf, aligned_size); + bio_set_op_attrs(&rbio.bio, REQ_OP_READ, REQ_SYNC); + rbio.bio.bi_iter.bi_sector = aligned_offset >> 9; + + struct closure cl; + closure_init_stack(&cl); + + closure_get(&cl); + rbio.bio.bi_end_io = bcachefs_fuse_read_endio; + rbio.bio.bi_private = &cl; + + bch2_read(c, rbio_init(&rbio.bio, io_opts), inum); + + closure_sync(&cl); + + return -blk_status_to_errno(rbio.bio.bi_status); +} + +static void bcachefs_fuse_read(fuse_req_t req, fuse_ino_t ino, + size_t size, off_t offset, + struct fuse_file_info *fi) +{ + subvol_inum inum = map_root_ino(ino); + struct bch_fs *c = fuse_req_userdata(req); + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_read(%llu, %zd, %lld)\n", + inum, size, offset); + + /* Check inode size. */ + struct bch_inode_unpacked bi; + int ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) { + fuse_reply_err(req, -ret); + return; + } + + off_t end = min_t(u64, bi.bi_size, offset + size); + if (end <= offset) { + fuse_reply_buf(req, NULL, 0); + return; + } + size = end - offset; + + struct fuse_align_io align = align_io(c, size, offset); + + void *buf = aligned_alloc(PAGE_SIZE, align.size); + if (!buf) { + fuse_reply_err(req, ENOMEM); + return; + } + + ret = read_aligned(c, inum, align.size, align.start, buf); + + if (likely(!ret)) + fuse_reply_buf(req, buf + align.pad_start, size); + else + fuse_reply_err(req, -ret); + + free(buf); +} + +static int inode_update_times(struct bch_fs *c, subvol_inum inum) +{ + struct btree_trans *trans; + struct btree_iter iter; + struct bch_inode_unpacked inode_u; + int ret = 0; + u64 now; + + trans = bch2_trans_get(c); +retry: + bch2_trans_begin(trans); + now = bch2_current_time(c); + + ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); + if (ret) + goto err; + + inode_u.bi_mtime = now; + inode_u.bi_ctime = now; + + ret = bch2_inode_write(trans, &iter, &inode_u); + if (ret) + goto err; + + ret = bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); +err: + bch2_trans_iter_exit(trans, &iter); + if (ret == -EINTR) + goto retry; + + bch2_trans_put(trans); + return ret; +} + +static int write_aligned(struct bch_fs *c, subvol_inum inum, + struct bch_io_opts io_opts, void *buf, + size_t aligned_size, off_t aligned_offset, + off_t new_i_size, size_t *written_out) +{ + + struct write_aligned_op_t w = { 0 } +; + struct bch_write_op *op = &w.op; + struct bio_vec bv; + + BUG_ON(aligned_size & (block_bytes(c) - 1)); + BUG_ON(aligned_offset & (block_bytes(c) - 1)); + + *written_out = 0; + + closure_init_stack(&w.cl); + + bch2_write_op_init(op, c, io_opts); /* XXX reads from op?! */ + op->write_point = writepoint_hashed(0); + op->nr_replicas = io_opts.data_replicas; + op->target = io_opts.foreground_target; + op->subvol = inum.subvol; + op->pos = POS(inum.inum, aligned_offset >> 9); + op->new_i_size = new_i_size; + op->end_io = bcachefs_fuse_write_endio; + + userbio_init(&op->wbio.bio, &bv, buf, aligned_size); + bio_set_op_attrs(&op->wbio.bio, REQ_OP_WRITE, REQ_SYNC); + + if (bch2_disk_reservation_get(c, &op->res, aligned_size >> 9, + op->nr_replicas, 0)) { + /* XXX: use check_range_allocated like dio write path */ + return -ENOSPC; + } + + closure_get(&w.cl); + + closure_call(&op->cl, bch2_write, NULL, NULL); + + closure_sync(&w.cl); + + if (!op->error) + *written_out = op->written << 9; + + return op->error; +} + +static void bcachefs_fuse_write(fuse_req_t req, fuse_ino_t ino, + const char *buf, size_t size, + off_t offset, + struct fuse_file_info *fi) +{ + subvol_inum inum = map_root_ino(ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_io_opts io_opts; + size_t aligned_written; + int ret = 0; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write(%llu, %zd, %lld)\n", + inum, size, offset); + + struct fuse_align_io align = align_io(c, size, offset); + void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size); + BUG_ON(!aligned_buf); + + if (get_inode_io_opts(c, inum, &io_opts)) { + ret = -ENOENT; + goto err; + } + + /* Realign the data and read in start and end, if needed */ + + /* Read partial start data. */ + if (align.pad_start) { + memset(aligned_buf, 0, block_bytes(c)); + + ret = read_aligned(c, inum, block_bytes(c), align.start, + aligned_buf); + if (ret) + goto err; + } + + /* + * Read partial end data. If the whole write fits in one block, the + * start data and the end data are the same so this isn't needed. + */ + if (align.pad_end && + !(align.pad_start && align.size == block_bytes(c))) { + off_t partial_end_start = align.end - block_bytes(c); + size_t buf_offset = align.size - block_bytes(c); + + memset(aligned_buf + buf_offset, 0, block_bytes(c)); + + ret = read_aligned(c, inum, block_bytes(c), partial_end_start, + aligned_buf + buf_offset); + if (ret) + goto err; + } + + /* Overlay what we want to write. */ + memcpy(aligned_buf + align.pad_start, buf, size); + + /* Actually write. */ + ret = write_aligned(c, inum, io_opts, aligned_buf, + align.size, align.start, + offset + size, &aligned_written); + + /* Figure out how many unaligned bytes were written. */ + size_t written = align_fix_up_bytes(&align, aligned_written); + BUG_ON(written > size); + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_write: wrote %zd bytes\n", + written); + + if (written > 0) + ret = 0; + + /* + * Update inode times. + * TODO: Integrate with bch2_extent_update() + */ + if (!ret) + ret = inode_update_times(c, inum); + + if (!ret) { + BUG_ON(written == 0); + fuse_reply_write(req, written); + free(aligned_buf); + return; + } + +err: + fuse_reply_err(req, -ret); + free(aligned_buf); +} + +static void bcachefs_fuse_symlink(fuse_req_t req, const char *link, + fuse_ino_t dir_ino, const char *name) +{ + subvol_inum dir = map_root_ino(dir_ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked new_inode; + size_t link_len = strlen(link); + int ret; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_symlink(%s, %llu, %s)\n", + link, dir.inum, name); + + ret = do_create(c, dir, name, S_IFLNK|S_IRWXUGO, 0, &new_inode); + if (ret) + goto err; + + struct bch_io_opts io_opts; + ret = get_inode_io_opts(c, dir, &io_opts); + if (ret) + goto err; + + struct fuse_align_io align = align_io(c, link_len + 1, 0); + + void *aligned_buf = aligned_alloc(PAGE_SIZE, align.size); + BUG_ON(!aligned_buf); + + memset(aligned_buf, 0, align.size); + memcpy(aligned_buf, link, link_len); /* already terminated */ + + subvol_inum inum = (subvol_inum) { dir.subvol, new_inode.bi_inum }; + + size_t aligned_written; + ret = write_aligned(c, inum, io_opts, aligned_buf, + align.size, align.start, link_len + 1, + &aligned_written); + free(aligned_buf); + + if (ret) + goto err; + + size_t written = align_fix_up_bytes(&align, aligned_written); + BUG_ON(written != link_len + 1); // TODO: handle short + + ret = inode_update_times(c, inum); + if (ret) + goto err; + + new_inode.bi_size = written; + + struct fuse_entry_param e = inode_to_entry(c, &new_inode); + fuse_reply_entry(req, &e); + return; + +err: + fuse_reply_err(req, -ret); +} + +static void bcachefs_fuse_readlink(fuse_req_t req, fuse_ino_t ino) +{ + subvol_inum inum = map_root_ino(ino); + struct bch_fs *c = fuse_req_userdata(req); + char *buf = NULL; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readlink(%llu)\n", inum.inum); + + struct bch_inode_unpacked bi; + int ret = bch2_inode_find_by_inum(c, inum, &bi); + if (ret) + goto err; + + struct fuse_align_io align = align_io(c, bi.bi_size, 0); + + ret = -ENOMEM; + buf = aligned_alloc(PAGE_SIZE, align.size); + if (!buf) + goto err; + + ret = read_aligned(c, inum, align.size, align.start, buf); + if (ret) + goto err; + + BUG_ON(buf[align.size - 1] != 0); + + fuse_reply_readlink(req, buf); + +err: + if (ret) + fuse_reply_err(req, -ret); + + free(buf); +} + +#if 0 +/* + * FUSE flush is essentially the close() call, however it is not guaranteed + * that one flush happens per open/create. + * + * It doesn't have to do anything, and is mostly relevant for NFS-style + * filesystems where close has some relationship to caching. + */ +static void bcachefs_fuse_flush(fuse_req_t req, fuse_ino_t inum, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_release(fuse_req_t req, fuse_ino_t inum, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_fsync(fuse_req_t req, fuse_ino_t inum, int datasync, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_opendir(fuse_req_t req, fuse_ino_t inum, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} +#endif + +struct fuse_dir_context { + struct dir_context ctx; + fuse_req_t req; + char *buf; + size_t bufsize; +}; + +struct fuse_dirent { + uint64_t ino; + uint64_t off; + uint32_t namelen; + uint32_t type; + char name[]; +}; + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) + +static size_t fuse_add_direntry2(char *buf, size_t bufsize, + const char *name, int namelen, + const struct stat *stbuf, off_t off) +{ + size_t entlen = FUSE_NAME_OFFSET + namelen; + size_t entlen_padded = FUSE_DIRENT_ALIGN(entlen); + struct fuse_dirent *dirent = (struct fuse_dirent *) buf; + + if ((buf == NULL) || (entlen_padded > bufsize)) + return entlen_padded; + + dirent->ino = stbuf->st_ino; + dirent->off = off; + dirent->namelen = namelen; + dirent->type = (stbuf->st_mode & S_IFMT) >> 12; + memcpy(dirent->name, name, namelen); + memset(dirent->name + namelen, 0, entlen_padded - entlen); + + return entlen_padded; +} + +static int fuse_filldir(struct dir_context *_ctx, + const char *name, int namelen, + loff_t pos, u64 ino, unsigned type) +{ + struct fuse_dir_context *ctx = + container_of(_ctx, struct fuse_dir_context, ctx); + + struct stat statbuf = { + .st_ino = unmap_root_ino(ino), + .st_mode = type << 12, + }; + + fuse_log(FUSE_LOG_DEBUG, "fuse_filldir(name=%s inum=%llu pos=%llu)\n", + name, statbuf.st_ino, pos); + + size_t len = fuse_add_direntry2(ctx->buf, + ctx->bufsize, + name, + namelen, + &statbuf, + pos + 1); + + if (len > ctx->bufsize) + return -1; + + ctx->buf += len; + ctx->bufsize -= len; + + return 0; +} + +static bool handle_dots(struct fuse_dir_context *ctx, fuse_ino_t dir) +{ + if (ctx->ctx.pos == 0) { + if (fuse_filldir(&ctx->ctx, ".", 1, ctx->ctx.pos, + dir, DT_DIR) < 0) + return false; + ctx->ctx.pos = 1; + } + + if (ctx->ctx.pos == 1) { + if (fuse_filldir(&ctx->ctx, "..", 2, ctx->ctx.pos, + /*TODO: parent*/ 1, DT_DIR) < 0) + return false; + ctx->ctx.pos = 2; + } + + return true; +} + +static void bcachefs_fuse_readdir(fuse_req_t req, fuse_ino_t dir_ino, + size_t size, off_t off, + struct fuse_file_info *fi) +{ + subvol_inum dir = map_root_ino(dir_ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked bi; + char *buf = calloc(size, 1); + struct fuse_dir_context ctx = { + .ctx.actor = fuse_filldir, + .ctx.pos = off, + .req = req, + .buf = buf, + .bufsize = size, + }; + int ret = 0; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir(dir=%llu, size=%zu, " + "off=%lld)\n", dir.inum, size, off); + + ret = bch2_inode_find_by_inum(c, dir, &bi); + if (ret) + goto reply; + + if (!S_ISDIR(bi.bi_mode)) { + ret = -ENOTDIR; + goto reply; + } + + if (!handle_dots(&ctx, dir.inum)) + goto reply; + + ret = bch2_readdir(c, dir, &ctx.ctx); +reply: + if (!ret) { + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_readdir reply %zd\n", + ctx.buf - buf); + fuse_reply_buf(req, buf, ctx.buf - buf); + } else { + fuse_reply_err(req, -ret); + } + + free(buf); +} + +#if 0 +static void bcachefs_fuse_readdirplus(fuse_req_t req, fuse_ino_t dir, + size_t size, off_t off, + struct fuse_file_info *fi) +{ + +} + +static void bcachefs_fuse_releasedir(fuse_req_t req, fuse_ino_t inum, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_fsyncdir(fuse_req_t req, fuse_ino_t inum, int datasync, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} +#endif + +static void bcachefs_fuse_statfs(fuse_req_t req, fuse_ino_t inum) +{ + struct bch_fs *c = fuse_req_userdata(req); + struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c); + unsigned shift = c->block_bits; + struct statvfs statbuf = { + .f_bsize = block_bytes(c), + .f_frsize = block_bytes(c), + .f_blocks = usage.capacity >> shift, + .f_bfree = (usage.capacity - usage.used) >> shift, + //.f_bavail = statbuf.f_bfree, + .f_files = usage.nr_inodes, + .f_ffree = U64_MAX, + .f_namemax = BCH_NAME_MAX, + }; + + fuse_reply_statfs(req, &statbuf); +} + +#if 0 +static void bcachefs_fuse_setxattr(fuse_req_t req, fuse_ino_t inum, + const char *name, const char *value, + size_t size, int flags) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_getxattr(fuse_req_t req, fuse_ino_t inum, + const char *name, size_t size) +{ + struct bch_fs *c = fuse_req_userdata(req); + + fuse_reply_xattr(req, ); +} + +static void bcachefs_fuse_listxattr(fuse_req_t req, fuse_ino_t inum, size_t size) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_removexattr(fuse_req_t req, fuse_ino_t inum, + const char *name) +{ + struct bch_fs *c = fuse_req_userdata(req); +} +#endif + +static void bcachefs_fuse_create(fuse_req_t req, fuse_ino_t dir_ino, + const char *name, mode_t mode, + struct fuse_file_info *fi) +{ + subvol_inum dir = map_root_ino(dir_ino); + struct bch_fs *c = fuse_req_userdata(req); + struct bch_inode_unpacked new_inode; + int ret; + + fuse_log(FUSE_LOG_DEBUG, "bcachefs_fuse_create(%llu, %s, %x)\n", + dir.inum, name, mode); + + ret = do_create(c, dir, name, mode, 0, &new_inode); + if (ret) + goto err; + + struct fuse_entry_param e = inode_to_entry(c, &new_inode); + fuse_reply_create(req, &e, fi); + return; +err: + fuse_reply_err(req, -ret); +} + +#if 0 +static void bcachefs_fuse_write_buf(fuse_req_t req, fuse_ino_t inum, + struct fuse_bufvec *bufv, off_t off, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} + +static void bcachefs_fuse_fallocate(fuse_req_t req, fuse_ino_t inum, int mode, + off_t offset, off_t length, + struct fuse_file_info *fi) +{ + struct bch_fs *c = fuse_req_userdata(req); +} +#endif + +static const struct fuse_lowlevel_ops bcachefs_fuse_ops = { + .init = bcachefs_fuse_init, + .destroy = bcachefs_fuse_destroy, + .lookup = bcachefs_fuse_lookup, + .getattr = bcachefs_fuse_getattr, + .setattr = bcachefs_fuse_setattr, + .readlink = bcachefs_fuse_readlink, + .mknod = bcachefs_fuse_mknod, + .mkdir = bcachefs_fuse_mkdir, + .unlink = bcachefs_fuse_unlink, + .rmdir = bcachefs_fuse_rmdir, + .symlink = bcachefs_fuse_symlink, + .rename = bcachefs_fuse_rename, + .link = bcachefs_fuse_link, + .open = bcachefs_fuse_open, + .read = bcachefs_fuse_read, + .write = bcachefs_fuse_write, + //.flush = bcachefs_fuse_flush, + //.release = bcachefs_fuse_release, + //.fsync = bcachefs_fuse_fsync, + //.opendir = bcachefs_fuse_opendir, + .readdir = bcachefs_fuse_readdir, + //.readdirplus = bcachefs_fuse_readdirplus, + //.releasedir = bcachefs_fuse_releasedir, + //.fsyncdir = bcachefs_fuse_fsyncdir, + .statfs = bcachefs_fuse_statfs, + //.setxattr = bcachefs_fuse_setxattr, + //.getxattr = bcachefs_fuse_getxattr, + //.listxattr = bcachefs_fuse_listxattr, + //.removexattr = bcachefs_fuse_removexattr, + .create = bcachefs_fuse_create, + + /* posix locks: */ +#if 0 + .getlk = bcachefs_fuse_getlk, + .setlk = bcachefs_fuse_setlk, +#endif + //.write_buf = bcachefs_fuse_write_buf, + //.fallocate = bcachefs_fuse_fallocate, + +}; + +/* + * Setup and command parsing. + */ + +struct bf_context { + char *devices_str; + char **devices; + int nr_devices; +}; + +static void bf_context_free(struct bf_context *ctx) +{ + int i; + + free(ctx->devices_str); + for (i = 0; i < ctx->nr_devices; ++i) + free(ctx->devices[i]); + free(ctx->devices); +} + +static struct fuse_opt bf_opts[] = { + FUSE_OPT_END +}; + +/* + * Fuse option parsing helper -- returning 0 means we consumed the argument, 1 + * means we did not. + */ +static int bf_opt_proc(void *data, const char *arg, int key, + struct fuse_args *outargs) +{ + struct bf_context *ctx = data; + + switch (key) { + case FUSE_OPT_KEY_NONOPT: + /* Just extract the first non-option string. */ + if (!ctx->devices_str) { + ctx->devices_str = strdup(arg); + return 0; + } + return 1; + } + + return 1; +} + +/* + * dev1:dev2 -> [ dev1, dev2 ] + * dev -> [ dev ] + */ +static void tokenize_devices(struct bf_context *ctx) +{ + char *devices_str = strdup(ctx->devices_str); + char *devices_tmp = devices_str; + char **devices = NULL; + int nr = 0; + char *dev = NULL; + + while ((dev = strsep(&devices_tmp, ":"))) { + if (strlen(dev) > 0) { + devices = realloc(devices, (nr + 1) * sizeof *devices); + devices[nr] = strdup(dev); + nr++; + } + } + + if (!devices) { + devices = malloc(sizeof *devices); + devices[0] = strdup(ctx->devices_str); + nr = 1; + } + + ctx->devices = devices; + ctx->nr_devices = nr; + + free(devices_str); +} + +static void usage(char *argv[]) +{ + printf("Usage: %s fusemount [options] <dev>[:dev2:...] <mountpoint>\n", + argv[0]); + printf("\n"); +} + +int cmd_fusemount(int argc, char *argv[]) +{ + struct fuse_args args = FUSE_ARGS_INIT(argc, argv); + struct bch_opts bch_opts = bch2_opts_empty(); + struct bf_context ctx = { 0 }; + struct bch_fs *c = NULL; + struct fuse_session *se = NULL; + int ret = 0, i; + + /* Parse arguments. */ + if (fuse_opt_parse(&args, &ctx, bf_opts, bf_opt_proc) < 0) + die("fuse_opt_parse err: %m"); + + struct fuse_cmdline_opts fuse_opts; + if (fuse_parse_cmdline(&args, &fuse_opts) < 0) + die("fuse_parse_cmdline err: %m"); + + if (fuse_opts.show_help) { + usage(argv); + fuse_cmdline_help(); + fuse_lowlevel_help(); + ret = 0; + goto out; + } + if (fuse_opts.show_version) { + printf("FUSE library version %s\n", fuse_pkgversion()); + fuse_lowlevel_version(); + printf("bcachefs version: %s\n", VERSION_STRING); + ret = 0; + goto out; + } + if (!fuse_opts.mountpoint) { + usage(argv); + printf("Please supply a mountpoint.\n"); + ret = 1; + goto out; + } + if (!ctx.devices_str) { + usage(argv); + printf("Please specify a device or device1:device2:...\n"); + ret = 1; + goto out; + } + tokenize_devices(&ctx); + + struct printbuf fsname = PRINTBUF; + prt_printf(&fsname, "fsname="); + for (i = 0; i < ctx.nr_devices; ++i) { + if (i) + prt_str(&fsname, ":"); + prt_str(&fsname, ctx.devices[i]); + } + + fuse_opt_add_arg(&args, "-o"); + fuse_opt_add_arg(&args, fsname.buf); + + /* Open bch */ + printf("Opening bcachefs filesystem on:\n"); + for (i = 0; i < ctx.nr_devices; ++i) + printf("\t%s\n", ctx.devices[i]); + + c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts); + if (IS_ERR(c)) + die("error opening %s: %s", ctx.devices_str, + bch2_err_str(PTR_ERR(c))); + + /* Fuse */ + se = fuse_session_new(&args, &bcachefs_fuse_ops, + sizeof(bcachefs_fuse_ops), c); + if (!se) { + fprintf(stderr, "fuse_lowlevel_new err: %m\n"); + goto err; + } + + if (fuse_set_signal_handlers(se) < 0) { + fprintf(stderr, "fuse_set_signal_handlers err: %m\n"); + goto err; + } + + if (fuse_session_mount(se, fuse_opts.mountpoint)) { + fprintf(stderr, "fuse_mount err: %m\n"); + goto err; + } + + /* This print statement is a trigger for tests. */ + printf("Fuse mount initialized.\n"); + + if (fuse_opts.foreground == 0){ + printf("Fuse forcing to foreground mode, due gcc constructors usage.\n"); + fuse_opts.foreground = 1; + } + + fuse_daemonize(fuse_opts.foreground); + + ret = fuse_session_loop(se); + +out: + if (se) { + fuse_session_unmount(se); + fuse_remove_signal_handlers(se); + fuse_session_destroy(se); + } + + free(fuse_opts.mountpoint); + fuse_opt_free_args(&args); + bf_context_free(&ctx); + + return ret ? 1 : 0; + +err: + bch2_fs_stop(c); + goto out; +} + +#endif /* BCACHEFS_FUSE */ diff --git a/c_src/cmd_key.c b/c_src/cmd_key.c new file mode 100644 index 00000000..adb0ac8d --- /dev/null +++ b/c_src/cmd_key.c @@ -0,0 +1,161 @@ +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <uuid/uuid.h> + +#include "cmds.h" +#include "libbcachefs/checksum.h" +#include "crypto.h" +#include "libbcachefs.h" +#include "tools-util.h" + +static void unlock_usage(void) +{ + puts("bcachefs unlock - unlock an encrypted filesystem so it can be mounted\n" + "Usage: bcachefs unlock [OPTION] device\n" + "\n" + "Options:\n" + " -c Check if a device is encrypted\n" + " -k (session|user|user_session)\n" + " Keyring to add to (default: user)\n" + " -f Passphrase file to read from (disables passphrase prompt)\n" + " -h Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_unlock(int argc, char *argv[]) +{ + const char *keyring = "user"; + bool check = false; + const char *passphrase_file_path = NULL; + char *passphrase = NULL; + + int opt; + + while ((opt = getopt(argc, argv, "cf:k:h")) != -1) + switch (opt) { + case 'c': + check = true; + break; + case 'k': + keyring = strdup(optarg); + break; + case 'f': + passphrase_file_path = strdup(optarg); + break; + case 'h': + unlock_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + char *dev = arg_pop(); + if (!dev) + die("Please supply a device"); + + if (argc) + die("Too many arguments"); + + struct bch_opts opts = bch2_opts_empty(); + + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + + struct bch_sb_handle sb; + int ret = bch2_read_super(dev, &opts, &sb); + if (ret) + die("Error opening %s: %s", dev, bch2_err_str(ret)); + + if (!bch2_sb_is_encrypted(sb.sb)) + die("%s is not encrypted", dev); + + if (check) + exit(EXIT_SUCCESS); + if (passphrase_file_path){ + passphrase = read_file_str(AT_FDCWD, passphrase_file_path); + } else { + passphrase = read_passphrase("Enter passphrase: "); + } + + bch2_add_key(sb.sb, "user", keyring, passphrase); + + bch2_free_super(&sb); + memzero_explicit(passphrase, strlen(passphrase)); + free(passphrase); + return 0; +} + +int cmd_set_passphrase(int argc, char *argv[]) +{ + struct bch_opts opts = bch2_opts_empty(); + struct bch_fs *c; + + if (argc < 2) + die("Please supply one or more devices"); + + opt_set(opts, nostart, true); + + /* + * we use bch2_fs_open() here, instead of just reading the superblock, + * to make sure we're opening and updating every component device: + */ + + c = bch2_fs_open(argv + 1, argc - 1, opts); + if (IS_ERR(c)) + die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); + + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); + if (!crypt) + die("Filesystem does not have encryption enabled"); + + struct bch_encrypted_key new_key; + new_key.magic = BCH_KEY_MAGIC; + + int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key); + if (ret) + die("Error getting current key"); + + char *new_passphrase = read_passphrase_twice("Enter new passphrase: "); + struct bch_key passphrase_key = derive_passphrase(crypt, new_passphrase); + + if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(c->disk_sb.sb), + &new_key, sizeof(new_key))) + die("error encrypting key"); + crypt->key = new_key; + + bch2_revoke_key(c->disk_sb.sb); + bch2_write_super(c); + bch2_fs_stop(c); + return 0; +} + +int cmd_remove_passphrase(int argc, char *argv[]) +{ + struct bch_opts opts = bch2_opts_empty(); + struct bch_fs *c; + + if (argc < 2) + die("Please supply one or more devices"); + + opt_set(opts, nostart, true); + c = bch2_fs_open(argv + 1, argc - 1, opts); + if (IS_ERR(c)) + die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); + + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); + if (!crypt) + die("Filesystem does not have encryption enabled"); + + struct bch_encrypted_key new_key; + new_key.magic = BCH_KEY_MAGIC; + + int ret = bch2_decrypt_sb_key(c, crypt, &new_key.key); + if (ret) + die("Error getting current key"); + + crypt->key = new_key; + + bch2_write_super(c); + bch2_fs_stop(c); + return 0; +} diff --git a/c_src/cmd_kill_btree_node.c b/c_src/cmd_kill_btree_node.c new file mode 100644 index 00000000..c8f43150 --- /dev/null +++ b/c_src/cmd_kill_btree_node.c @@ -0,0 +1,140 @@ +#include <fcntl.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "tools-util.h" + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/btree_iter.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/error.h" +#include "libbcachefs/sb-members.h" +#include "libbcachefs/super.h" + +static void kill_btree_node_usage(void) +{ + puts("bcachefs kill_btree_node - make btree nodes unreadable\n" + "Usage: bcachefs kill_btree_node [OPTION]... <devices>\n" + "\n" + "Options:\n" + " -b (extents|inodes|dirents|xattrs) Btree to delete from\n" + " -l level Levle to delete from (0 == leaves)\n" + " -i index Index of btree node to kill\n" + " -h Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +struct kill_node { + unsigned btree; + unsigned level; + u64 idx; +}; + +int cmd_kill_btree_node(int argc, char *argv[]) +{ + struct bch_opts opts = bch2_opts_empty(); + DARRAY(struct kill_node) kill_nodes = {}; + int opt; + + opt_set(opts, read_only, true); + + while ((opt = getopt(argc, argv, "n:h")) != -1) + switch (opt) { + case 'n': { + char *p = optarg; + const char *str_btree = strsep(&p, ":"); + const char *str_level = strsep(&p, ":"); + const char *str_idx = strsep(&p, ":"); + + struct kill_node n = { + .btree = read_string_list_or_die(str_btree, + __bch2_btree_ids, "btree id"), + }; + + if (str_level && + (kstrtouint(str_level, 10, &n.level) || n.level >= BTREE_MAX_DEPTH)) + die("invalid level"); + + if (str_idx && + kstrtoull(str_idx, 10, &n.idx)) + die("invalid index %s", str_idx); + + darray_push(&kill_nodes, n); + break; + } + case 'h': + kill_btree_node_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + if (!argc) + die("Please supply device(s)"); + + struct bch_fs *c = bch2_fs_open(argv, argc, opts); + if (IS_ERR(c)) + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); + + int ret; + void *zeroes; + + ret = posix_memalign(&zeroes, c->opts.block_size, c->opts.block_size); + if (ret) + die("error %s from posix_memalign", bch2_err_str(ret)); + + struct btree_trans *trans = bch2_trans_get(c); + + darray_for_each(kill_nodes, i) { + ret = __for_each_btree_node(trans, iter, i->btree, POS_MIN, 0, i->level, 0, b, ({ + if (b->c.level != i->level) + continue; + + int ret2 = 0; + if (!i->idx) { + struct printbuf buf = PRINTBUF; + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); + bch_info(c, "killing btree node %s l=%u %s", + bch2_btree_id_str(i->btree), i->level, buf.buf); + printbuf_exit(&buf); + + ret2 = 1; + + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); + if (!ca) + continue; + + int ret3 = pwrite(ca->disk_sb.bdev->bd_fd, zeroes, + c->opts.block_size, ptr->offset << 9); + bch2_dev_put(ca); + if (ret3 != c->opts.block_size) { + bch_err(c, "pwrite error: expected %u got %i %s", + c->opts.block_size, ret, strerror(errno)); + ret2 = EXIT_FAILURE; + } + } + } + + i->idx--; + ret2; + })); + + if (ret < 0) { + bch_err(c, "error %i walking btree nodes", ret); + break; + } else if (!ret) { + bch_err(c, "node at specified index not found"); + ret = EXIT_FAILURE; + break; + } + } + + bch2_trans_put(trans); + bch2_fs_stop(c); + darray_exit(&kill_nodes); + return ret < 0 ? ret : 0; +} diff --git a/c_src/cmd_list_journal.c b/c_src/cmd_list_journal.c new file mode 100644 index 00000000..fe7f9b05 --- /dev/null +++ b/c_src/cmd_list_journal.c @@ -0,0 +1,306 @@ +#include <fcntl.h> +#include <getopt.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "tools-util.h" + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/btree_iter.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/error.h" +#include "libbcachefs/journal_io.h" +#include "libbcachefs/journal_seq_blacklist.h" +#include "libbcachefs/super.h" + +static const char *NORMAL = "\x1B[0m"; +static const char *RED = "\x1B[31m"; + +static void list_journal_usage(void) +{ + puts("bcachefs list_journal - print contents of journal\n" + "Usage: bcachefs list_journal [OPTION]... <devices>\n" + "\n" + "Options:\n" + " -a Read entire journal, not just dirty entries\n" + " -n, --nr-entries=nr Number of journal entries to print, starting from the most recent\n" + " -t, --transaction-filter=bbpos Filter transactions not updating <bbpos>\n" + " Or entries not matching the range <bbpos-bbpos>\n" + " -k, --key-filter=btree Filter keys not updating btree\n" + " -v, --verbose Verbose mode\n" + " -h, --help Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +static void star_start_of_lines(char *buf) +{ + char *p = buf; + + if (*p == ' ') + *p = '*'; + + while ((p = strstr(p, "\n "))) + p[1] = '*'; +} + +static inline bool entry_is_transaction_start(struct jset_entry *entry) +{ + return entry->type == BCH_JSET_ENTRY_log && !entry->level; +} + +typedef DARRAY(struct bbpos_range) d_bbpos_range; +typedef DARRAY(enum btree_id) d_btree_id; + +static bool bkey_matches_filter(d_bbpos_range filter, struct jset_entry *entry, struct bkey_i *k) +{ + darray_for_each(filter, i) { + struct bbpos k_start = BBPOS(entry->btree_id, bkey_start_pos(&k->k)); + struct bbpos k_end = BBPOS(entry->btree_id, k->k.p); + + if (bbpos_cmp(k_start, i->end) < 0 && + bbpos_cmp(k_end, i->start) > 0) + return true; + } + return false; +} + +static bool entry_matches_transaction_filter(struct jset_entry *entry, + d_bbpos_range filter) +{ + if (entry->type == BCH_JSET_ENTRY_btree_root || + entry->type == BCH_JSET_ENTRY_btree_keys || + entry->type == BCH_JSET_ENTRY_overwrite) + jset_entry_for_each_key(entry, k) + if (bkey_matches_filter(filter, entry, k)) + return true; + return false; +} + +static bool should_print_transaction(struct jset_entry *entry, struct jset_entry *end, + darray_str msg_filter, + d_bbpos_range key_filter) +{ + struct jset_entry_log *l = container_of(entry, struct jset_entry_log, entry); + unsigned b = jset_entry_log_msg_bytes(l); + + darray_for_each(msg_filter, i) + if (!strncmp(*i, l->d, b)) + return false; + + if (!key_filter.nr) + return true; + + for (entry = vstruct_next(entry); + entry != end && !entry_is_transaction_start(entry); + entry = vstruct_next(entry)) + if (entry_matches_transaction_filter(entry, key_filter)) + return true; + + return false; +} + +static bool should_print_entry(struct jset_entry *entry, d_btree_id filter) +{ + if (!filter.nr) + return true; + + if (entry->type != BCH_JSET_ENTRY_btree_root && + entry->type != BCH_JSET_ENTRY_btree_keys && + entry->type != BCH_JSET_ENTRY_overwrite) + return true; + + jset_entry_for_each_key(entry, k) + darray_for_each(filter, id) + if (entry->btree_id == *id) + return true; + + return false; +} + +static void journal_entry_header_to_text(struct printbuf *out, + struct bch_fs *c, + struct journal_replay *p, bool blacklisted) +{ + if (blacklisted) + prt_str(out, "blacklisted "); + + prt_printf(out, + "journal entry %llu\n" + " version %u\n" + " last seq %llu\n" + " flush %u\n" + " written at ", + le64_to_cpu(p->j.seq), + le32_to_cpu(p->j.version), + le64_to_cpu(p->j.last_seq), + !JSET_NO_FLUSH(&p->j)); + bch2_journal_ptrs_to_text(out, c, p); + + if (blacklisted) + star_start_of_lines(out->buf); +} + +static void journal_entry_header_print(struct bch_fs *c, struct journal_replay *p, bool blacklisted) +{ + struct printbuf buf = PRINTBUF; + journal_entry_header_to_text(&buf, c, p, blacklisted); + printf("%s\n", buf.buf); + printbuf_exit(&buf); +} + +static void journal_entries_print(struct bch_fs *c, unsigned nr_entries, + darray_str transaction_msg_filter, + d_bbpos_range transaction_key_filter, + d_btree_id key_filter) +{ + struct journal_replay *p, **_p; + struct genradix_iter iter; + struct printbuf buf = PRINTBUF; + + genradix_for_each(&c->journal_entries, iter, _p) { + bool printed_header = false; + + p = *_p; + if (!p) + continue; + + if (le64_to_cpu(p->j.seq) + nr_entries < atomic64_read(&c->journal.seq)) + continue; + + bool blacklisted = p->ignore_blacklisted || + bch2_journal_seq_is_blacklisted(c, + le64_to_cpu(p->j.seq), false); + + if (!transaction_msg_filter.nr && + !transaction_key_filter.nr) { + journal_entry_header_print(c, p, blacklisted); + printed_header = true; + } + + struct jset_entry *entry = p->j.start; + struct jset_entry *end = vstruct_last(&p->j); + while (entry != end) { + + /* + * log entries denote the start of a new transaction + * commit: + */ + if (entry_is_transaction_start(entry)) { + if (!should_print_transaction(entry, end, + transaction_msg_filter, + transaction_key_filter)) { + do { + entry = vstruct_next(entry); + } while (entry != end && !entry_is_transaction_start(entry)); + + continue; + } + + prt_newline(&buf); + } + + if (!should_print_entry(entry, key_filter)) + goto next; + + if (!printed_header) + journal_entry_header_print(c, p, blacklisted); + printed_header = true; + + bool highlight = entry_matches_transaction_filter(entry, transaction_key_filter); + if (highlight) + fputs(RED, stdout); + + printbuf_indent_add(&buf, 4); + bch2_journal_entry_to_text(&buf, c, entry); + + if (blacklisted) + star_start_of_lines(buf.buf); + printf("%s\n", buf.buf); + printbuf_reset(&buf); + + if (highlight) + fputs(NORMAL, stdout); +next: + entry = vstruct_next(entry); + } + } + + printbuf_exit(&buf); +} + +int cmd_list_journal(int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "nr-entries", required_argument, NULL, 'n' }, + { "transaction-filter", required_argument, NULL, 't' }, + { "key-filter", required_argument, NULL, 'k' }, + { "verbose", no_argument, NULL, 'v' }, + { "help", no_argument, NULL, 'h' }, + { NULL } + }; + struct bch_opts opts = bch2_opts_empty(); + u32 nr_entries = U32_MAX; + darray_str transaction_msg_filter = {}; + d_bbpos_range transaction_key_filter = {}; + d_btree_id key_filter = {}; + int opt; + + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + opt_set(opts, norecovery, true); + opt_set(opts, read_only, true); + opt_set(opts, degraded, true); + opt_set(opts, very_degraded, true); + opt_set(opts, errors, BCH_ON_ERROR_continue); + opt_set(opts, fix_errors, FSCK_FIX_yes); + opt_set(opts, retain_recovery_info ,true); + opt_set(opts, read_journal_only,true); + + while ((opt = getopt_long(argc, argv, "an:m:t:k:vh", + longopts, NULL)) != -1) + switch (opt) { + case 'a': + opt_set(opts, read_entire_journal, true); + break; + case 'n': + if (kstrtouint(optarg, 10, &nr_entries)) + die("error parsing nr_entries"); + opt_set(opts, read_entire_journal, true); + break; + case 'm': + darray_push(&transaction_msg_filter, strdup(optarg)); + break; + case 't': + darray_push(&transaction_key_filter, bbpos_range_parse(optarg)); + break; + case 'k': + darray_push(&key_filter, read_string_list_or_die(optarg, __bch2_btree_ids, "btree id")); + break; + case 'v': + opt_set(opts, verbose, true); + break; + case 'h': + list_journal_usage(); + exit(EXIT_SUCCESS); + } + args_shift(optind); + + if (!argc) + die("Please supply device(s) to open"); + + darray_str devs = get_or_split_cmdline_devs(argc, argv); + + struct bch_fs *c = bch2_fs_open(devs.data, devs.nr, opts); + if (IS_ERR(c)) + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); + + journal_entries_print(c, nr_entries, + transaction_msg_filter, + transaction_key_filter, + key_filter); + bch2_fs_stop(c); + return 0; +} diff --git a/c_src/cmd_migrate.c b/c_src/cmd_migrate.c new file mode 100644 index 00000000..a5b7786d --- /dev/null +++ b/c_src/cmd_migrate.c @@ -0,0 +1,426 @@ +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <unistd.h> + +#include <linux/fiemap.h> +#include <linux/fs.h> +#include <linux/stat.h> + +#include <uuid/uuid.h> + +#include "cmds.h" +#include "crypto.h" +#include "libbcachefs.h" +#include "posix_to_bcachefs.h" + +#include <linux/dcache.h> +#include <linux/generic-radix-tree.h> +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/btree_update.h" +#include "libbcachefs/buckets.h" +#include "libbcachefs/dirent.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/inode.h" +#include "libbcachefs/replicas.h" +#include "libbcachefs/super.h" + +/* XXX cut and pasted from fsck.c */ +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +static char *dev_t_to_path(dev_t dev) +{ + char link[PATH_MAX], *p; + int ret; + + char *sysfs_dev = mprintf("/sys/dev/block/%u:%u", + major(dev), minor(dev)); + ret = readlink(sysfs_dev, link, sizeof(link)); + free(sysfs_dev); + + if (ret < 0 || ret >= sizeof(link)) + die("readlink error while looking up block device: %m"); + + link[ret] = '\0'; + + p = strrchr(link, '/'); + if (!p) + die("error looking up device name"); + p++; + + return mprintf("/dev/%s", p); +} + +static bool path_is_fs_root(const char *path) +{ + char *line = NULL, *p, *mount; + size_t n = 0; + FILE *f; + bool ret = true; + + f = fopen("/proc/self/mountinfo", "r"); + if (!f) + die("Error getting mount information"); + + while (getline(&line, &n, f) != -1) { + p = line; + + strsep(&p, " "); /* mount id */ + strsep(&p, " "); /* parent id */ + strsep(&p, " "); /* dev */ + strsep(&p, " "); /* root */ + mount = strsep(&p, " "); + strsep(&p, " "); + + if (mount && !strcmp(path, mount)) + goto found; + } + + ret = false; +found: + fclose(f); + free(line); + return ret; +} + +static void mark_unreserved_space(struct bch_fs *c, ranges extents) +{ + struct bch_dev *ca = c->devs[0]; + struct hole_iter iter; + struct range i; + + for_each_hole(iter, extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) { + u64 b; + + if (i.start == i.end) + return; + + b = sector_to_bucket(ca, i.start >> 9); + do { + set_bit(b, ca->buckets_nouse); + b++; + } while (bucket_to_sector(ca, b) << 9 < i.end); + } +} + +static ranges reserve_new_fs_space(const char *file_path, unsigned block_size, + u64 size, u64 *bcachefs_inum, dev_t dev, + bool force) +{ + int fd = force + ? open(file_path, O_RDWR|O_CREAT, 0600) + : open(file_path, O_RDWR|O_CREAT|O_EXCL, 0600); + if (fd < 0) + die("Error creating %s for bcachefs metadata: %m", + file_path); + + struct stat statbuf = xfstat(fd); + + if (statbuf.st_dev != dev) + die("bcachefs file has incorrect device"); + + *bcachefs_inum = statbuf.st_ino; + + if (fallocate(fd, 0, 0, size)) + die("Error reserving space for bcachefs metadata: %m"); + + fsync(fd); + + struct fiemap_iter iter; + struct fiemap_extent e; + ranges extents = { 0 }; + + fiemap_for_each(fd, iter, e) { + if (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN| + FIEMAP_EXTENT_ENCODED| + FIEMAP_EXTENT_NOT_ALIGNED| + FIEMAP_EXTENT_DATA_INLINE)) + die("Unable to continue: metadata file not fully mapped"); + + if ((e.fe_physical & (block_size - 1)) || + (e.fe_length & (block_size - 1))) + die("Unable to continue: unaligned extents in metadata file"); + + range_add(&extents, e.fe_physical, e.fe_length); + } + fiemap_iter_exit(&iter); + close(fd); + + ranges_sort_merge(&extents); + return extents; +} + +static void find_superblock_space(ranges extents, + struct format_opts opts, + struct dev_opts *dev) +{ + darray_for_each(extents, i) { + u64 start = round_up(max(256ULL << 10, i->start), + dev->bucket_size << 9); + u64 end = round_down(i->end, + dev->bucket_size << 9); + + /* Need space for two superblocks: */ + if (start + (opts.superblock_size << 9) * 2 <= end) { + dev->sb_offset = start >> 9; + dev->sb_end = dev->sb_offset + opts.superblock_size * 2; + return; + } + } + + die("Couldn't find a valid location for superblock"); +} + +static void migrate_usage(void) +{ + puts("bcachefs migrate - migrate an existing filesystem to bcachefs\n" + "Usage: bcachefs migrate [OPTION]...\n" + "\n" + "Options:\n" + " -f fs Root of filesystem to migrate(s)\n" + " --encrypted Enable whole filesystem encryption (chacha20/poly1305)\n" + " --no_passphrase Don't encrypt master encryption key\n" + " -F Force, even if metadata file already exists\n" + " -h Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +static const struct option migrate_opts[] = { + { "encrypted", no_argument, NULL, 'e' }, + { "no_passphrase", no_argument, NULL, 'p' }, + { NULL } +}; + +static int migrate_fs(const char *fs_path, + struct bch_opt_strs fs_opt_strs, + struct bch_opts fs_opts, + struct format_opts format_opts, + bool force) +{ + if (!path_is_fs_root(fs_path)) + die("%s is not a filesystem root", fs_path); + + int fs_fd = xopen(fs_path, O_RDONLY|O_NOATIME); + struct stat stat = xfstat(fs_fd); + + if (!S_ISDIR(stat.st_mode)) + die("%s is not a directory", fs_path); + + struct dev_opts dev = dev_opts_default(); + + dev.path = dev_t_to_path(stat.st_dev); + dev.file = bdev_file_open_by_path(dev.path, BLK_OPEN_READ|BLK_OPEN_WRITE, &dev, NULL); + + int ret = PTR_ERR_OR_ZERO(dev.file); + if (ret < 0) + die("Error opening device to format %s: %s", dev.path, strerror(-ret)); + dev.bdev = file_bdev(dev.file); + + opt_set(fs_opts, block_size, get_blocksize(dev.bdev->bd_fd)); + + char *file_path = mprintf("%s/bcachefs", fs_path); + printf("Creating new filesystem on %s in space reserved at %s\n", + dev.path, file_path); + + dev.size = get_size(dev.bdev->bd_fd); + dev.bucket_size = bch2_pick_bucket_size(fs_opts, &dev); + dev.nbuckets = dev.size / dev.bucket_size; + + bch2_check_bucket_size(fs_opts, &dev); + + u64 bcachefs_inum; + ranges extents = reserve_new_fs_space(file_path, + fs_opts.block_size >> 9, + get_size(dev.bdev->bd_fd) / 5, + &bcachefs_inum, stat.st_dev, force); + + find_superblock_space(extents, format_opts, &dev); + + struct bch_sb *sb = bch2_format(fs_opt_strs, + fs_opts, format_opts, &dev, 1); + u64 sb_offset = le64_to_cpu(sb->layout.sb_offset[0]); + + if (format_opts.passphrase) + bch2_add_key(sb, "user", "user", format_opts.passphrase); + + free(sb); + + struct bch_opts opts = bch2_opts_empty(); + struct bch_fs *c = NULL; + char *path[1] = { dev.path }; + + opt_set(opts, sb, sb_offset); + opt_set(opts, nostart, true); + opt_set(opts, noexcl, true); + opt_set(opts, nostart, true); + + c = bch2_fs_open(path, 1, opts); + if (IS_ERR(c)) + die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); + + ret = bch2_buckets_nouse_alloc(c); + if (ret) + die("Error allocating buckets_nouse: %s", bch2_err_str(ret)); + + ret = bch2_fs_start(c); + if (IS_ERR(c)) + die("Error starting new filesystem: %s", bch2_err_str(ret)); + + mark_unreserved_space(c, extents); + + ret = bch2_fs_start(c); + if (ret) + die("Error starting new filesystem: %s", bch2_err_str(ret)); + + struct copy_fs_state s = { + .bcachefs_inum = bcachefs_inum, + .dev = stat.st_dev, + .extents = extents, + .type = BCH_MIGRATE_migrate, + }; + + copy_fs(c, fs_fd, fs_path, &s); + + bch2_fs_stop(c); + + printf("Migrate complete, running fsck:\n"); + opt_set(opts, nostart, false); + opt_set(opts, nochanges, true); + opt_set(opts, read_only, true); + + c = bch2_fs_open(path, 1, opts); + if (IS_ERR(c)) + die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); + + bch2_fs_stop(c); + printf("fsck complete\n"); + + printf("To mount the new filesystem, run\n" + " mount -t bcachefs -o sb=%llu %s dir\n" + "\n" + "After verifying that the new filesystem is correct, to create a\n" + "superblock at the default offset and finish the migration run\n" + " bcachefs migrate-superblock -d %s -o %llu\n" + "\n" + "The new filesystem will have a file at /old_migrated_filesystem\n" + "referencing all disk space that might be used by the existing\n" + "filesystem. That file can be deleted once the old filesystem is\n" + "no longer needed (and should be deleted prior to running\n" + "bcachefs migrate-superblock)\n", + sb_offset, dev.path, dev.path, sb_offset); + return 0; +} + +int cmd_migrate(int argc, char *argv[]) +{ + struct format_opts format_opts = format_opts_default(); + char *fs_path = NULL; + bool no_passphrase = false, force = false; + int opt; + + struct bch_opt_strs fs_opt_strs = + bch2_cmdline_opts_get(&argc, argv, OPT_FORMAT); + struct bch_opts fs_opts = bch2_parse_opts(fs_opt_strs); + + while ((opt = getopt_long(argc, argv, "f:Fh", + migrate_opts, NULL)) != -1) + switch (opt) { + case 'f': + fs_path = optarg; + break; + case 'e': + format_opts.encrypted = true; + break; + case 'p': + no_passphrase = true; + break; + case 'F': + force = true; + break; + case 'h': + migrate_usage(); + exit(EXIT_SUCCESS); + } + + if (!fs_path) + die("Please specify a filesystem to migrate"); + + if (format_opts.encrypted && !no_passphrase) + format_opts.passphrase = read_passphrase_twice("Enter passphrase: "); + + int ret = migrate_fs(fs_path, + fs_opt_strs, + fs_opts, + format_opts, force); + bch2_opt_strs_free(&fs_opt_strs); + return ret; +} + +static void migrate_superblock_usage(void) +{ + puts("bcachefs migrate-superblock - create default superblock after migrating\n" + "Usage: bcachefs migrate-superblock [OPTION]...\n" + "\n" + "Options:\n" + " -d device Device to create superblock for\n" + " -o offset Offset of existing superblock\n" + " -h Display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); +} + +int cmd_migrate_superblock(int argc, char *argv[]) +{ + char *dev = NULL; + u64 offset = 0; + int opt, ret; + + while ((opt = getopt(argc, argv, "d:o:h")) != -1) + switch (opt) { + case 'd': + dev = optarg; + break; + case 'o': + ret = kstrtou64(optarg, 10, &offset); + if (ret) + die("Invalid offset"); + break; + case 'h': + migrate_superblock_usage(); + exit(EXIT_SUCCESS); + } + + if (!dev) + die("Please specify a device"); + + if (!offset) + die("Please specify offset of existing superblock"); + + int fd = xopen(dev, O_RDWR); + struct bch_sb *sb = __bch2_super_read(fd, offset); + + if (sb->layout.nr_superblocks >= ARRAY_SIZE(sb->layout.sb_offset)) + die("Can't add superblock: no space left in superblock layout"); + + unsigned i; + for (i = 0; i < sb->layout.nr_superblocks; i++) + if (le64_to_cpu(sb->layout.sb_offset[i]) == BCH_SB_SECTOR) + die("Superblock layout already has default superblock"); + + memmove(&sb->layout.sb_offset[1], + &sb->layout.sb_offset[0], + sb->layout.nr_superblocks * sizeof(u64)); + sb->layout.nr_superblocks++; + + sb->layout.sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR); + + bch2_super_write(fd, sb); + close(fd); + + return 0; +} diff --git a/c_src/cmd_option.c b/c_src/cmd_option.c new file mode 100644 index 00000000..21048d7d --- /dev/null +++ b/c_src/cmd_option.c @@ -0,0 +1,168 @@ +/* + * Authors: Kent Overstreet <kent.overstreet@gmail.com> + * + * GPLv2 + */ +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <uuid/uuid.h> + +#include "cmds.h" +#include "libbcachefs.h" +#include "libbcachefs/errcode.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/super-io.h" + +static void set_option_usage(void) +{ + puts("bcachefs set-fs-option \n" + "Usage: bcachefs set-fs-option [OPTION].. device\n" + "\n" + "Options:\n"); + bch2_opts_usage(OPT_MOUNT); + puts(" -h, --help display this help and exit\n" + "Report bugs to <linux-bcachefs@vger.kernel.org>"); + exit(EXIT_SUCCESS); +} + +static int name_to_dev_idx(struct bch_fs *c, const char *dev) +{ + int ret = -1; + + rcu_read_lock(); + for_each_member_device_rcu(c, ca, NULL) + if (!strcmp(ca->name, dev)) { + ret = ca->dev_idx; + break; + } + rcu_read_unlock(); + + return ret; +} + +int cmd_set_option(int argc, char *argv[]) +{ + struct bch_opt_strs new_opt_strs = bch2_cmdline_opts_get(&argc, argv, OPT_MOUNT|OPT_DEVICE); + struct bch_opts new_opts = bch2_parse_opts(new_opt_strs); + unsigned i; + int opt, ret = 0; + + while ((opt = getopt(argc, argv, "h")) != -1) + switch (opt) { + case 'h': + set_option_usage(); + break; + } + args_shift(optind); + + if (!argc) { + fprintf(stderr, "Please supply device(s)\n"); + exit(EXIT_FAILURE); + } + + bool online = false; + for (i = 0; i < argc; i++) + if (dev_mounted(argv[i])) { + online = true; + break; + } + + if (!online) { + struct bch_opts open_opts = bch2_opts_empty(); + opt_set(open_opts, nostart, true); + + struct bch_fs *c = bch2_fs_open(argv, argc, open_opts); + if (IS_ERR(c)) { + fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c))); + exit(EXIT_FAILURE); + } + + for (i = 0; i < bch2_opts_nr; i++) { + const struct bch_option *opt = bch2_opt_table + i; + + u64 v = bch2_opt_get_by_id(&new_opts, i); + + if (!bch2_opt_defined_by_id(&new_opts, i)) + continue; + + ret = bch2_opt_check_may_set(c, i, v); + if (ret < 0) { + fprintf(stderr, "error setting %s: %i\n", opt->attr.name, ret); + continue; + } + + if (!(opt->flags & (OPT_FS|OPT_DEVICE))) + fprintf(stderr, "Can't set option %s\n", opt->attr.name); + + if (opt->flags & OPT_FS) { + bch2_opt_set_sb(c, NULL, opt, v); + } + + if (opt->flags & OPT_DEVICE) { + for (unsigned dev = 0; dev < argc; dev++) { + int dev_idx = name_to_dev_idx(c, argv[dev]); + if (dev_idx < 0) { + fprintf(stderr, "Couldn't look up device %s\n", argv[i]); + continue; + } + + bch2_opt_set_sb(c, c->devs[dev_idx], opt, v); + } + } + } + + bch2_fs_stop(c); + return ret; + } else { + unsigned dev_idx; + struct bchfs_handle fs = bchu_fs_open_by_dev(argv[i], &dev_idx); + + for (i = 0; i < argc; i++) { + struct bchfs_handle fs2 = bchu_fs_open_by_dev(argv[i], &dev_idx); + if (memcmp(&fs.uuid, &fs2.uuid, sizeof(fs2.uuid))) + die("Filesystem mounted, but not all devices are members"); + bcache_fs_close(fs2); + } + + for (i = 0; i < bch2_opts_nr; i++) { + if (!new_opt_strs.by_id[i]) + continue; + + const struct bch_option *opt = bch2_opt_table + i; + + if (!(opt->flags & (OPT_FS|OPT_DEVICE))) + fprintf(stderr, "Can't set option %s\n", opt->attr.name); + + if (opt->flags & OPT_FS) { + char *path = mprintf("options/%s", opt->attr.name); + + write_file_str(fs.sysfs_fd, path, new_opt_strs.by_id[i]); + free(path); + } + + if (opt->flags & OPT_DEVICE) { + for (unsigned dev = 0; dev < argc; dev++) { + struct bchfs_handle fs2 = bchu_fs_open_by_dev(argv[i], &dev_idx); + bcache_fs_close(fs2); + + + char *path = mprintf("dev-%u/%s", dev_idx, opt->attr.name); + write_file_str(fs.sysfs_fd, path, new_opt_strs.by_id[i]); + free(path); + } + } + } + } + return 0; +} diff --git a/c_src/cmd_run.c b/c_src/cmd_run.c new file mode 100644 index 00000000..1bf84e5c --- /dev/null +++ b/c_src/cmd_run.c @@ -0,0 +1,33 @@ + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <fcntl.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <uuid/uuid.h> + +#include "libbcachefs/bcachefs_ioctl.h" +#include "cmds.h" +#include "libbcachefs.h" + +#if 0 +int cmd_run(int argc, char *argv[]) +{ + return 0; +} + +int cmd_stop(int argc, char *argv[]) +{ + if (argc != 2) + die("Please supply a filesystem"); + + struct bchfs_handle fs = bcache_fs_open(argv[1]); + xioctl(fs.ioctl_fd, BCH_IOCTL_STOP); + return 0; +} +#endif diff --git a/c_src/cmd_version.c b/c_src/cmd_version.c new file mode 100644 index 00000000..5fe30e5e --- /dev/null +++ b/c_src/cmd_version.c @@ -0,0 +1,9 @@ +#include <stdio.h> + +#include "cmds.h" + +int cmd_version(int argc, char *argv[]) +{ + printf("%s\n", VERSION_STRING); + return 0; +} diff --git a/c_src/cmds.h b/c_src/cmds.h new file mode 100644 index 00000000..64267dc4 --- /dev/null +++ b/c_src/cmds.h @@ -0,0 +1,63 @@ +/* + * Author: Kent Overstreet <kent.overstreet@gmail.com> + * + * GPLv2 + */ + +#ifndef _CMDS_H +#define _CMDS_H + +#include "tools-util.h" + +int cmd_format(int argc, char *argv[]); +int cmd_show_super(int argc, char *argv[]); +int cmd_reset_counters(int argc, char *argv[]); +int cmd_set_option(int argc, char *argv[]); + +int cmd_fs_usage(int argc, char *argv[]); + +int device_usage(void); +int cmd_device_add(int argc, char *argv[]); +int cmd_device_remove(int argc, char *argv[]); +int cmd_device_online(int argc, char *argv[]); +int cmd_device_offline(int argc, char *argv[]); +int cmd_device_evacuate(int argc, char *argv[]); +int cmd_device_set_state(int argc, char *argv[]); +int cmd_device_resize(int argc, char *argv[]); +int cmd_device_resize_journal(int argc, char *argv[]); + +int data_usage(void); +int cmd_data_rereplicate(int argc, char *argv[]); +int cmd_data_job(int argc, char *argv[]); + +int cmd_unlock(int argc, char *argv[]); +int cmd_set_passphrase(int argc, char *argv[]); +int cmd_remove_passphrase(int argc, char *argv[]); + +int cmd_fsck(int argc, char *argv[]); + +int cmd_dump(int argc, char *argv[]); +int cmd_list_journal(int argc, char *argv[]); +int cmd_kill_btree_node(int argc, char *argv[]); + +int cmd_migrate(int argc, char *argv[]); +int cmd_migrate_superblock(int argc, char *argv[]); + +int cmd_version(int argc, char *argv[]); + +int cmd_setattr(int argc, char *argv[]); + +int subvolume_usage(void); +int cmd_subvolume_create(int argc, char *argv[]); +int cmd_subvolume_delete(int argc, char *argv[]); +int cmd_subvolume_snapshot(int argc, char *argv[]); + +int cmd_fusemount(int argc, char *argv[]); + +void bcachefs_usage(void); +int device_cmds(int argc, char *argv[]); +int fs_cmds(int argc, char *argv[]); +int data_cmds(int argc, char *argv[]); +int subvolume_cmds(int argc, char *argv[]); + +#endif /* _CMDS_H */ diff --git a/c_src/config.h b/c_src/config.h new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/c_src/config.h diff --git a/c_src/crypto.c b/c_src/crypto.c new file mode 100644 index 00000000..32671bd8 --- /dev/null +++ b/c_src/crypto.c @@ -0,0 +1,201 @@ +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <termios.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <unistd.h> + +#include <keyutils.h> +#include <linux/random.h> +#include <sodium/crypto_pwhash_scryptsalsa208sha256.h> +#include <uuid/uuid.h> + +#include "libbcachefs/checksum.h" +#include "crypto.h" + +char *read_passphrase(const char *prompt) +{ + char *buf = NULL; + size_t buflen = 0; + ssize_t len; + + if (isatty(STDIN_FILENO)) { + struct termios old, new; + + fprintf(stderr, "%s", prompt); + fflush(stderr); + + if (tcgetattr(STDIN_FILENO, &old)) + die("error getting terminal attrs"); + + new = old; + new.c_lflag &= ~ECHO; + if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new)) + die("error setting terminal attrs"); + + len = getline(&buf, &buflen, stdin); + + tcsetattr(STDIN_FILENO, TCSAFLUSH, &old); + fprintf(stderr, "\n"); + } else { + len = getline(&buf, &buflen, stdin); + } + + if (len < 0) + die("error reading passphrase"); + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + + return buf; +} + +char *read_passphrase_twice(const char *prompt) +{ + char *pass = read_passphrase(prompt); + + if (!isatty(STDIN_FILENO)) + return pass; + + char *pass2 = read_passphrase("Enter same passphrase again: "); + + if (strcmp(pass, pass2)) { + memzero_explicit(pass, strlen(pass)); + memzero_explicit(pass2, strlen(pass2)); + die("Passphrases do not match"); + } + + memzero_explicit(pass2, strlen(pass2)); + free(pass2); + + return pass; +} + +struct bch_key derive_passphrase(struct bch_sb_field_crypt *crypt, + const char *passphrase) +{ + const unsigned char salt[] = "bcache"; + struct bch_key key; + int ret; + + switch (BCH_CRYPT_KDF_TYPE(crypt)) { + case BCH_KDF_SCRYPT: + ret = crypto_pwhash_scryptsalsa208sha256_ll( + (void *) passphrase, strlen(passphrase), + salt, sizeof(salt), + 1ULL << BCH_KDF_SCRYPT_N(crypt), + 1ULL << BCH_KDF_SCRYPT_R(crypt), + 1ULL << BCH_KDF_SCRYPT_P(crypt), + (void *) &key, sizeof(key)); + if (ret) + die("scrypt error: %i", ret); + break; + default: + die("unknown kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt)); + } + + return key; +} + +bool bch2_sb_is_encrypted(struct bch_sb *sb) +{ + struct bch_sb_field_crypt *crypt; + + return (crypt = bch2_sb_field_get(sb, crypt)) && + bch2_key_is_encrypted(&crypt->key); +} + +void bch2_passphrase_check(struct bch_sb *sb, const char *passphrase, + struct bch_key *passphrase_key, + struct bch_encrypted_key *sb_key) +{ + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(sb, crypt); + if (!crypt) + die("filesystem is not encrypted"); + + *sb_key = crypt->key; + + if (!bch2_key_is_encrypted(sb_key)) + die("filesystem does not have encryption key"); + + *passphrase_key = derive_passphrase(crypt, passphrase); + + /* Check if the user supplied the correct passphrase: */ + if (bch2_chacha_encrypt_key(passphrase_key, __bch2_sb_key_nonce(sb), + sb_key, sizeof(*sb_key))) + die("error encrypting key"); + + if (bch2_key_is_encrypted(sb_key)) + die("incorrect passphrase"); +} + +void bch2_add_key(struct bch_sb *sb, + const char *type, + const char *keyring_str, + const char *passphrase) +{ + struct bch_key passphrase_key; + struct bch_encrypted_key sb_key; + int keyring; + + if (!strcmp(keyring_str, "session")) + keyring = KEY_SPEC_SESSION_KEYRING; + else if (!strcmp(keyring_str, "user")) + keyring = KEY_SPEC_USER_KEYRING; + else if (!strcmp(keyring_str, "user_session")) + keyring = KEY_SPEC_USER_SESSION_KEYRING; + else + die("unknown keyring %s", keyring_str); + + bch2_passphrase_check(sb, passphrase, + &passphrase_key, + &sb_key); + + char uuid[40]; + uuid_unparse_lower(sb->user_uuid.b, uuid); + + char *description = mprintf("bcachefs:%s", uuid); + + if (add_key(type, + description, + &passphrase_key, sizeof(passphrase_key), + keyring) < 0) + die("add_key error: %m"); + + memzero_explicit(description, strlen(description)); + free(description); + memzero_explicit(&passphrase_key, sizeof(passphrase_key)); + memzero_explicit(&sb_key, sizeof(sb_key)); +} + +void bch_sb_crypt_init(struct bch_sb *sb, + struct bch_sb_field_crypt *crypt, + const char *passphrase) +{ + crypt->key.magic = BCH_KEY_MAGIC; + get_random_bytes(&crypt->key.key, sizeof(crypt->key.key)); + + if (passphrase) { + + SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT); + SET_BCH_KDF_SCRYPT_N(crypt, ilog2(16384)); + SET_BCH_KDF_SCRYPT_R(crypt, ilog2(8)); + SET_BCH_KDF_SCRYPT_P(crypt, ilog2(16)); + + struct bch_key passphrase_key = derive_passphrase(crypt, passphrase); + + assert(!bch2_key_is_encrypted(&crypt->key)); + + if (bch2_chacha_encrypt_key(&passphrase_key, __bch2_sb_key_nonce(sb), + &crypt->key, sizeof(crypt->key))) + die("error encrypting key"); + + assert(bch2_key_is_encrypted(&crypt->key)); + + memzero_explicit(&passphrase_key, sizeof(passphrase_key)); + } +} diff --git a/c_src/crypto.h b/c_src/crypto.h new file mode 100644 index 00000000..baea6d86 --- /dev/null +++ b/c_src/crypto.h @@ -0,0 +1,22 @@ +#ifndef _CRYPTO_H +#define _CRYPTO_H + +#include "tools-util.h" + +struct bch_sb; +struct bch_sb_field_crypt; +struct bch_key; +struct bch_encrypted_key; + +char *read_passphrase(const char *); +char *read_passphrase_twice(const char *); + +struct bch_key derive_passphrase(struct bch_sb_field_crypt *, const char *); +bool bch2_sb_is_encrypted(struct bch_sb *); +void bch2_passphrase_check(struct bch_sb *, const char *, + struct bch_key *, struct bch_encrypted_key *); +void bch2_add_key(struct bch_sb *, const char *, const char *, const char *); +void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *, + const char *); + +#endif /* _CRYPTO_H */ diff --git a/c_src/libbcachefs.c b/c_src/libbcachefs.c new file mode 100644 index 00000000..75cab72c --- /dev/null +++ b/c_src/libbcachefs.c @@ -0,0 +1,754 @@ +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <uuid/uuid.h> + +#include "libbcachefs.h" +#include "crypto.h" +#include "libbcachefs/bcachefs_format.h" +#include "libbcachefs/btree_cache.h" +#include "libbcachefs/buckets.h" +#include "libbcachefs/checksum.h" +#include "libbcachefs/disk_groups.h" +#include "libbcachefs/journal_seq_blacklist.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/replicas.h" +#include "libbcachefs/super-io.h" +#include "tools-util.h" + +#define NSEC_PER_SEC 1000000000L + +static void init_layout(struct bch_sb_layout *l, + unsigned block_size, + unsigned sb_size, + u64 sb_start, u64 sb_end) +{ + u64 sb_pos = sb_start; + unsigned i; + + memset(l, 0, sizeof(*l)); + + l->magic = BCHFS_MAGIC; + l->layout_type = 0; + l->nr_superblocks = 2; + l->sb_max_size_bits = ilog2(sb_size); + + /* Create two superblocks in the allowed range: */ + for (i = 0; i < l->nr_superblocks; i++) { + if (sb_pos != BCH_SB_SECTOR) + sb_pos = round_up(sb_pos, block_size >> 9); + + l->sb_offset[i] = cpu_to_le64(sb_pos); + sb_pos += sb_size; + } + + if (sb_pos > sb_end) + die("insufficient space for superblocks: start %llu end %llu > %llu size %u", + sb_start, sb_pos, sb_end, sb_size); +} + +/* minimum size filesystem we can create, given a bucket size: */ +static u64 min_size(unsigned bucket_size) +{ + return BCH_MIN_NR_NBUCKETS * bucket_size; +} + +u64 bch2_pick_bucket_size(struct bch_opts opts, struct dev_opts *dev) +{ + u64 bucket_size; + + if (dev->size < min_size(opts.block_size)) + die("cannot format %s, too small (%llu bytes, min %llu)", + dev->path, dev->size, min_size(opts.block_size)); + + /* Bucket size must be >= block size: */ + bucket_size = opts.block_size; + + /* Bucket size must be >= btree node size: */ + if (opt_defined(opts, btree_node_size)) + bucket_size = max_t(unsigned, bucket_size, + opts.btree_node_size); + + /* Want a bucket size of at least 128k, if possible: */ + bucket_size = max(bucket_size, 128ULL << 10); + + if (dev->size >= min_size(bucket_size)) { + unsigned scale = max(1, + ilog2(dev->size / min_size(bucket_size)) / 4); + + scale = rounddown_pow_of_two(scale); + + /* max bucket size 1 mb */ + bucket_size = min(bucket_size * scale, 1ULL << 20); + } else { + do { + bucket_size /= 2; + } while (dev->size < min_size(bucket_size)); + } + + return bucket_size; +} + +void bch2_check_bucket_size(struct bch_opts opts, struct dev_opts *dev) +{ + if (dev->bucket_size < opts.block_size) + die("Bucket size (%llu) cannot be smaller than block size (%u)", + dev->bucket_size, opts.block_size); + + if (opt_defined(opts, btree_node_size) && + dev->bucket_size < opts.btree_node_size) + die("Bucket size (%llu) cannot be smaller than btree node size (%u)", + dev->bucket_size, opts.btree_node_size); + + if (dev->nbuckets < BCH_MIN_NR_NBUCKETS) + die("Not enough buckets: %llu, need %u (bucket size %llu)", + dev->nbuckets, BCH_MIN_NR_NBUCKETS, dev->bucket_size); + + if (dev->bucket_size > (u32) U16_MAX << 9) + die("Bucket size (%llu) too big (max %u)", + dev->bucket_size, (u32) U16_MAX << 9); +} + +static unsigned parse_target(struct bch_sb_handle *sb, + struct dev_opts *devs, size_t nr_devs, + const char *s) +{ + struct dev_opts *i; + int idx; + + if (!s) + return 0; + + for (i = devs; i < devs + nr_devs; i++) + if (!strcmp(s, i->path)) + return dev_to_target(i - devs); + + idx = bch2_disk_path_find(sb, s); + if (idx >= 0) + return group_to_target(idx); + + die("Invalid target %s", s); + return 0; +} + +struct bch_sb *bch2_format(struct bch_opt_strs fs_opt_strs, + struct bch_opts fs_opts, + struct format_opts opts, + struct dev_opts *devs, + size_t nr_devs) +{ + struct bch_sb_handle sb = { NULL }; + struct dev_opts *i; + unsigned max_dev_block_size = 0; + unsigned opt_id; + u64 min_bucket_size = U64_MAX; + + for (i = devs; i < devs + nr_devs; i++) + max_dev_block_size = max(max_dev_block_size, get_blocksize(i->bdev->bd_fd)); + + /* calculate block size: */ + if (!opt_defined(fs_opts, block_size)) { + opt_set(fs_opts, block_size, max_dev_block_size); + } else if (fs_opts.block_size < max_dev_block_size) + die("blocksize too small: %u, must be greater than device blocksize %u", + fs_opts.block_size, max_dev_block_size); + + /* get device size, if it wasn't specified: */ + for (i = devs; i < devs + nr_devs; i++) + if (!i->size) + i->size = get_size(i->bdev->bd_fd); + + /* calculate bucket sizes: */ + for (i = devs; i < devs + nr_devs; i++) + min_bucket_size = min(min_bucket_size, + i->bucket_size ?: bch2_pick_bucket_size(fs_opts, i)); + + for (i = devs; i < devs + nr_devs; i++) + if (!i->bucket_size) + i->bucket_size = min_bucket_size; + + for (i = devs; i < devs + nr_devs; i++) { + i->nbuckets = i->size / i->bucket_size; + bch2_check_bucket_size(fs_opts, i); + } + + /* calculate btree node size: */ + if (!opt_defined(fs_opts, btree_node_size)) { + /* 256k default btree node size */ + opt_set(fs_opts, btree_node_size, 256 << 10); + + for (i = devs; i < devs + nr_devs; i++) + fs_opts.btree_node_size = + min_t(unsigned, fs_opts.btree_node_size, + i->bucket_size); + } + + if (uuid_is_null(opts.uuid.b)) + uuid_generate(opts.uuid.b); + + if (bch2_sb_realloc(&sb, 0)) + die("insufficient memory"); + + sb.sb->version = le16_to_cpu(opts.version); + sb.sb->version_min = le16_to_cpu(opts.version); + sb.sb->magic = BCHFS_MAGIC; + sb.sb->user_uuid = opts.uuid; + sb.sb->nr_devices = nr_devs; + + if (opts.version == bcachefs_metadata_version_current) + sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + + uuid_generate(sb.sb->uuid.b); + + if (opts.label) + memcpy(sb.sb->label, + opts.label, + min(strlen(opts.label), sizeof(sb.sb->label))); + + for (opt_id = 0; + opt_id < bch2_opts_nr; + opt_id++) { + u64 v; + + v = bch2_opt_defined_by_id(&fs_opts, opt_id) + ? bch2_opt_get_by_id(&fs_opts, opt_id) + : bch2_opt_get_by_id(&bch2_opts_default, opt_id); + + __bch2_opt_set_sb(sb.sb, -1, &bch2_opt_table[opt_id], v); + } + + struct timespec now; + if (clock_gettime(CLOCK_REALTIME, &now)) + die("error getting current time: %m"); + + sb.sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec); + sb.sb->time_precision = cpu_to_le32(1); + + /* Member info: */ + struct bch_sb_field_members_v2 *mi = + bch2_sb_field_resize(&sb, members_v2, + (sizeof(*mi) + sizeof(struct bch_member) * + nr_devs) / sizeof(u64)); + mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); + for (i = devs; i < devs + nr_devs; i++) { + struct bch_member *m = bch2_members_v2_get_mut(sb.sb, (i - devs)); + + uuid_generate(m->uuid.b); + m->nbuckets = cpu_to_le64(i->nbuckets); + m->first_bucket = 0; + m->bucket_size = cpu_to_le16(i->bucket_size >> 9); + + SET_BCH_MEMBER_DISCARD(m, i->discard); + SET_BCH_MEMBER_DATA_ALLOWED(m, i->data_allowed); + SET_BCH_MEMBER_DURABILITY(m, i->durability + 1); + } + + /* Disk labels*/ + for (i = devs; i < devs + nr_devs; i++) { + struct bch_member *m; + int idx; + + if (!i->label) + continue; + + idx = bch2_disk_path_find_or_create(&sb, i->label); + if (idx < 0) + die("error creating disk path: %s", strerror(-idx)); + + /* + * Recompute mi and m after each sb modification: its location + * in memory may have changed due to reallocation. + */ + m = bch2_members_v2_get_mut(sb.sb, (i - devs)); + SET_BCH_MEMBER_GROUP(m, idx + 1); + } + + SET_BCH_SB_FOREGROUND_TARGET(sb.sb, + parse_target(&sb, devs, nr_devs, fs_opt_strs.foreground_target)); + SET_BCH_SB_BACKGROUND_TARGET(sb.sb, + parse_target(&sb, devs, nr_devs, fs_opt_strs.background_target)); + SET_BCH_SB_PROMOTE_TARGET(sb.sb, + parse_target(&sb, devs, nr_devs, fs_opt_strs.promote_target)); + SET_BCH_SB_METADATA_TARGET(sb.sb, + parse_target(&sb, devs, nr_devs, fs_opt_strs.metadata_target)); + + /* Crypt: */ + if (opts.encrypted) { + struct bch_sb_field_crypt *crypt = + bch2_sb_field_resize(&sb, crypt, sizeof(*crypt) / sizeof(u64)); + + bch_sb_crypt_init(sb.sb, crypt, opts.passphrase); + SET_BCH_SB_ENCRYPTION_TYPE(sb.sb, 1); + } + + bch2_sb_members_cpy_v2_v1(&sb); + + for (i = devs; i < devs + nr_devs; i++) { + u64 size_sectors = i->size >> 9; + + sb.sb->dev_idx = i - devs; + + if (!i->sb_offset) { + i->sb_offset = BCH_SB_SECTOR; + i->sb_end = size_sectors; + } + + init_layout(&sb.sb->layout, fs_opts.block_size, + opts.superblock_size, + i->sb_offset, i->sb_end); + + /* + * Also create a backup superblock at the end of the disk: + * + * If we're not creating a superblock at the default offset, it + * means we're being run from the migrate tool and we could be + * overwriting existing data if we write to the end of the disk: + */ + if (i->sb_offset == BCH_SB_SECTOR) { + struct bch_sb_layout *l = &sb.sb->layout; + u64 backup_sb = size_sectors - (1 << l->sb_max_size_bits); + + backup_sb = rounddown(backup_sb, i->bucket_size >> 9); + l->sb_offset[l->nr_superblocks++] = cpu_to_le64(backup_sb); + } + + if (i->sb_offset == BCH_SB_SECTOR) { + /* Zero start of disk */ + static const char zeroes[BCH_SB_SECTOR << 9]; + + xpwrite(i->bdev->bd_fd, zeroes, BCH_SB_SECTOR << 9, 0, + "zeroing start of disk"); + } + + bch2_super_write(i->bdev->bd_fd, sb.sb); + close(i->bdev->bd_fd); + } + + return sb.sb; +} + +void bch2_super_write(int fd, struct bch_sb *sb) +{ + struct nonce nonce = { 0 }; + unsigned bs = get_blocksize(fd); + + unsigned i; + for (i = 0; i < sb->layout.nr_superblocks; i++) { + sb->offset = sb->layout.sb_offset[i]; + + if (sb->offset == BCH_SB_SECTOR) { + /* Write backup layout */ + + BUG_ON(bs > 4096); + + char *buf = aligned_alloc(bs, bs); + xpread(fd, buf, bs, 4096 - bs); + memcpy(buf + bs - sizeof(sb->layout), + &sb->layout, + sizeof(sb->layout)); + xpwrite(fd, buf, bs, 4096 - bs, + "backup layout"); + free(buf); + + } + + sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb); + xpwrite(fd, sb, round_up(vstruct_bytes(sb), bs), + le64_to_cpu(sb->offset) << 9, + "superblock"); + } + + fsync(fd); +} + +struct bch_sb *__bch2_super_read(int fd, u64 sector) +{ + struct bch_sb sb, *ret; + + xpread(fd, &sb, sizeof(sb), sector << 9); + + if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)) && + memcmp(&sb.magic, &BCHFS_MAGIC, sizeof(sb.magic))) + die("not a bcachefs superblock"); + + size_t bytes = vstruct_bytes(&sb); + + ret = malloc(bytes); + + xpread(fd, ret, bytes, sector << 9); + + return ret; +} + +/* ioctl interface: */ + +/* Global control device: */ +int bcachectl_open(void) +{ + return xopen("/dev/bcachefs-ctl", O_RDWR); +} + +/* Filesystem handles (ioctl, sysfs dir): */ + +#define SYSFS_BASE "/sys/fs/bcachefs/" + +void bcache_fs_close(struct bchfs_handle fs) +{ + close(fs.ioctl_fd); + close(fs.sysfs_fd); +} + +struct bchfs_handle bcache_fs_open(const char *path) +{ + struct bchfs_handle ret; + + if (!uuid_parse(path, ret.uuid.b)) { + /* It's a UUID, look it up in sysfs: */ + char *sysfs = mprintf(SYSFS_BASE "%s", path); + ret.sysfs_fd = xopen(sysfs, O_RDONLY); + + char *minor = read_file_str(ret.sysfs_fd, "minor"); + char *ctl = mprintf("/dev/bcachefs%s-ctl", minor); + ret.ioctl_fd = xopen(ctl, O_RDWR); + + free(sysfs); + free(minor); + free(ctl); + } else { + /* It's a path: */ + ret.ioctl_fd = open(path, O_RDONLY); + if (ret.ioctl_fd < 0) + die("Error opening filesystem at %s: %m", path); + + struct bch_ioctl_query_uuid uuid; + if (ioctl(ret.ioctl_fd, BCH_IOCTL_QUERY_UUID, &uuid) < 0) + die("error opening %s: not a bcachefs filesystem", path); + + ret.uuid = uuid.uuid; + + char uuid_str[40]; + uuid_unparse(uuid.uuid.b, uuid_str); + + char *sysfs = mprintf(SYSFS_BASE "%s", uuid_str); + ret.sysfs_fd = xopen(sysfs, O_RDONLY); + free(sysfs); + } + + return ret; +} + +/* + * Given a path to a block device, open the filesystem it belongs to; also + * return the device's idx: + */ +struct bchfs_handle bchu_fs_open_by_dev(const char *path, int *idx) +{ + struct bch_opts opts = bch2_opts_empty(); + char buf[1024], *uuid_str; + + struct stat stat = xstat(path); + + if (S_ISBLK(stat.st_mode)) { + char *sysfs = mprintf("/sys/dev/block/%u:%u/bcachefs", + major(stat.st_dev), + minor(stat.st_dev)); + + ssize_t len = readlink(sysfs, buf, sizeof(buf)); + free(sysfs); + + if (len <= 0) + goto read_super; + + char *p = strrchr(buf, '/'); + if (!p || sscanf(p + 1, "dev-%u", idx) != 1) + die("error parsing sysfs"); + + *p = '\0'; + p = strrchr(buf, '/'); + uuid_str = p + 1; + } else { +read_super: + opt_set(opts, noexcl, true); + opt_set(opts, nochanges, true); + + struct bch_sb_handle sb; + int ret = bch2_read_super(path, &opts, &sb); + if (ret) + die("Error opening %s: %s", path, strerror(-ret)); + + *idx = sb.sb->dev_idx; + uuid_str = buf; + uuid_unparse(sb.sb->user_uuid.b, uuid_str); + + bch2_free_super(&sb); + } + + return bcache_fs_open(uuid_str); +} + +int bchu_dev_path_to_idx(struct bchfs_handle fs, const char *dev_path) +{ + int idx; + struct bchfs_handle fs2 = bchu_fs_open_by_dev(dev_path, &idx); + + if (memcmp(&fs.uuid, &fs2.uuid, sizeof(fs.uuid))) + idx = -1; + bcache_fs_close(fs2); + return idx; +} + +int bchu_data(struct bchfs_handle fs, struct bch_ioctl_data cmd) +{ + int progress_fd = xioctl(fs.ioctl_fd, BCH_IOCTL_DATA, &cmd); + + while (1) { + struct bch_ioctl_data_event e; + + if (read(progress_fd, &e, sizeof(e)) != sizeof(e)) + die("error reading from progress fd %m"); + + if (e.type) + continue; + + if (e.p.data_type == U8_MAX) + break; + + printf("\33[2K\r"); + + printf("%llu%% complete: current position %s", + e.p.sectors_total + ? e.p.sectors_done * 100 / e.p.sectors_total + : 0, + bch2_data_type_str(e.p.data_type)); + + switch (e.p.data_type) { + case BCH_DATA_btree: + case BCH_DATA_user: + printf(" %s:%llu:%llu", + bch2_btree_id_str(e.p.btree_id), + e.p.pos.inode, + e.p.pos.offset); + } + + fflush(stdout); + sleep(1); + } + printf("\nDone\n"); + + close(progress_fd); + return 0; +} + +/* option parsing */ + +void bch2_opt_strs_free(struct bch_opt_strs *opts) +{ + unsigned i; + + for (i = 0; i < bch2_opts_nr; i++) { + free(opts->by_id[i]); + opts->by_id[i] = NULL; + } +} + +struct bch_opt_strs bch2_cmdline_opts_get(int *argc, char *argv[], + unsigned opt_types) +{ + struct bch_opt_strs opts; + unsigned i = 1; + + memset(&opts, 0, sizeof(opts)); + + while (i < *argc) { + char *optstr = strcmp_prefix(argv[i], "--"); + char *valstr = NULL, *p; + int optid, nr_args = 1; + + if (!optstr) { + i++; + continue; + } + + optstr = strdup(optstr); + + p = optstr; + while (isalpha(*p) || *p == '_') + p++; + + if (*p == '=') { + *p = '\0'; + valstr = p + 1; + } + + optid = bch2_opt_lookup(optstr); + if (optid < 0 || + !(bch2_opt_table[optid].flags & opt_types)) { + i++; + goto next; + } + + if (!valstr && + bch2_opt_table[optid].type != BCH_OPT_BOOL) { + nr_args = 2; + valstr = argv[i + 1]; + } + + if (!valstr) + valstr = "1"; + + opts.by_id[optid] = strdup(valstr); + + *argc -= nr_args; + memmove(&argv[i], + &argv[i + nr_args], + sizeof(char *) * (*argc - i)); + argv[*argc] = NULL; +next: + free(optstr); + } + + return opts; +} + +struct bch_opts bch2_parse_opts(struct bch_opt_strs strs) +{ + struct bch_opts opts = bch2_opts_empty(); + struct printbuf err = PRINTBUF; + unsigned i; + int ret; + u64 v; + + for (i = 0; i < bch2_opts_nr; i++) { + if (!strs.by_id[i]) + continue; + + ret = bch2_opt_parse(NULL, + &bch2_opt_table[i], + strs.by_id[i], &v, &err); + if (ret < 0 && ret != -BCH_ERR_option_needs_open_fs) + die("Invalid option %s", err.buf); + + bch2_opt_set_by_id(&opts, i, v); + } + + printbuf_exit(&err); + return opts; +} + +#define newline(c) \ + do { \ + printf("\n"); \ + c = 0; \ + } while(0) +void bch2_opts_usage(unsigned opt_types) +{ + const struct bch_option *opt; + unsigned i, c = 0, helpcol = 30; + + + + for (opt = bch2_opt_table; + opt < bch2_opt_table + bch2_opts_nr; + opt++) { + if (!(opt->flags & opt_types)) + continue; + + c += printf(" --%s", opt->attr.name); + + switch (opt->type) { + case BCH_OPT_BOOL: + break; + case BCH_OPT_STR: + c += printf("=("); + for (i = 0; opt->choices[i]; i++) { + if (i) + c += printf("|"); + c += printf("%s", opt->choices[i]); + } + c += printf(")"); + break; + default: + c += printf("=%s", opt->hint); + break; + } + + if (opt->help) { + const char *l = opt->help; + + if (c >= helpcol) + newline(c); + + while (1) { + const char *n = strchrnul(l, '\n'); + + while (c < helpcol) { + putchar(' '); + c++; + } + printf("%.*s", (int) (n - l), l); + newline(c); + + if (!*n) + break; + l = n + 1; + } + } else { + newline(c); + } + } +} + +dev_names bchu_fs_get_devices(struct bchfs_handle fs) +{ + DIR *dir = fdopendir(fs.sysfs_fd); + struct dirent *d; + dev_names devs; + + darray_init(&devs); + + while ((errno = 0), (d = readdir(dir))) { + struct dev_name n = { 0, NULL, NULL }; + + if (sscanf(d->d_name, "dev-%u", &n.idx) != 1) + continue; + + char *block_attr = mprintf("dev-%u/block", n.idx); + + char sysfs_block_buf[4096]; + ssize_t r = readlinkat(fs.sysfs_fd, block_attr, + sysfs_block_buf, sizeof(sysfs_block_buf)); + if (r > 0) { + sysfs_block_buf[r] = '\0'; + n.dev = strdup(basename(sysfs_block_buf)); + } + + free(block_attr); + + char *label_attr = mprintf("dev-%u/label", n.idx); + n.label = read_file_str(fs.sysfs_fd, label_attr); + free(label_attr); + + char *durability_attr = mprintf("dev-%u/durability", n.idx); + n.durability = read_file_u64(fs.sysfs_fd, durability_attr); + free(durability_attr); + + darray_push(&devs, n); + } + + closedir(dir); + + return devs; +} diff --git a/c_src/libbcachefs.h b/c_src/libbcachefs.h new file mode 100644 index 00000000..fc6eb8bf --- /dev/null +++ b/c_src/libbcachefs.h @@ -0,0 +1,300 @@ +#ifndef _LIBBCACHE_H +#define _LIBBCACHE_H + +#include <linux/uuid.h> +#include <stdbool.h> + +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/bcachefs_format.h" +#include "libbcachefs/bcachefs_ioctl.h" +#include "libbcachefs/inode.h" +#include "libbcachefs/opts.h" +#include "libbcachefs/vstructs.h" +#include "tools-util.h" + +/* option parsing */ + +#define SUPERBLOCK_SIZE_DEFAULT 2048 /* 1 MB */ + +struct bch_opt_strs { +union { + char *by_id[bch2_opts_nr]; +struct { +#define x(_name, ...) char *_name; + BCH_OPTS() +#undef x +}; +}; +}; + +void bch2_opt_strs_free(struct bch_opt_strs *); +struct bch_opt_strs bch2_cmdline_opts_get(int *, char *[], unsigned); +struct bch_opts bch2_parse_opts(struct bch_opt_strs); +void bch2_opts_usage(unsigned); + +struct format_opts { + char *label; + __uuid_t uuid; + unsigned version; + unsigned superblock_size; + bool encrypted; + char *passphrase; + char *source; +}; + +static inline struct format_opts format_opts_default() +{ + unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK) + ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version") + : bcachefs_metadata_version_current; + + return (struct format_opts) { + .version = version, + .superblock_size = SUPERBLOCK_SIZE_DEFAULT, + }; +} + +struct dev_opts { + struct file *file; + struct block_device *bdev; + char *path; + u64 size; /* bytes*/ + u64 bucket_size; /* bytes */ + const char *label; + unsigned data_allowed; + unsigned durability; + bool discard; + + u64 nbuckets; + + u64 sb_offset; + u64 sb_end; +}; + +static inline struct dev_opts dev_opts_default() +{ + return (struct dev_opts) { + .data_allowed = ~0U << 2, + .durability = 1, + }; +} + +u64 bch2_pick_bucket_size(struct bch_opts, struct dev_opts *); +void bch2_check_bucket_size(struct bch_opts, struct dev_opts *); + +struct bch_sb *bch2_format(struct bch_opt_strs, + struct bch_opts, + struct format_opts, struct dev_opts *, size_t); + +void bch2_super_write(int, struct bch_sb *); +struct bch_sb *__bch2_super_read(int, u64); + +/* ioctl interface: */ + +int bcachectl_open(void); + +struct bchfs_handle { + __uuid_t uuid; + int ioctl_fd; + int sysfs_fd; +}; + +void bcache_fs_close(struct bchfs_handle); +struct bchfs_handle bcache_fs_open(const char *); +struct bchfs_handle bchu_fs_open_by_dev(const char *, int *); +int bchu_dev_path_to_idx(struct bchfs_handle, const char *); + +static inline void bchu_disk_add(struct bchfs_handle fs, char *dev) +{ + struct bch_ioctl_disk i = { .dev = (unsigned long) dev, }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ADD, &i); +} + +static inline void bchu_disk_remove(struct bchfs_handle fs, unsigned dev_idx, + unsigned flags) +{ + struct bch_ioctl_disk i = { + .flags = flags|BCH_BY_INDEX, + .dev = dev_idx, + }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_REMOVE, &i); +} + +static inline void bchu_disk_online(struct bchfs_handle fs, char *dev) +{ + struct bch_ioctl_disk i = { .dev = (unsigned long) dev, }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_ONLINE, &i); +} + +static inline void bchu_disk_offline(struct bchfs_handle fs, unsigned dev_idx, + unsigned flags) +{ + struct bch_ioctl_disk i = { + .flags = flags|BCH_BY_INDEX, + .dev = dev_idx, + }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_OFFLINE, &i); +} + +static inline void bchu_disk_set_state(struct bchfs_handle fs, unsigned dev, + unsigned new_state, unsigned flags) +{ + struct bch_ioctl_disk_set_state i = { + .flags = flags|BCH_BY_INDEX, + .new_state = new_state, + .dev = dev, + }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_SET_STATE, &i); +} + +static inline struct bch_ioctl_fs_usage *bchu_fs_usage(struct bchfs_handle fs) +{ + struct bch_ioctl_fs_usage *u = NULL; + size_t replica_entries_bytes = 4096; + + while (1) { + u = xrealloc(u, sizeof(*u) + replica_entries_bytes); + u->replica_entries_bytes = replica_entries_bytes; + + if (!ioctl(fs.ioctl_fd, BCH_IOCTL_FS_USAGE, u)) + return u; + + if (errno != ERANGE) + die("BCH_IOCTL_USAGE error: %m"); + + replica_entries_bytes *= 2; + } +} + +static inline struct bch_ioctl_query_accounting *bchu_fs_accounting(struct bchfs_handle fs, + unsigned typemask) +{ + unsigned accounting_u64s = 128; + struct bch_ioctl_query_accounting *ret = NULL; + + while (1) { + ret = xrealloc(ret, sizeof(*ret) + accounting_u64s * sizeof(u64)); + + memset(ret, 0, sizeof(*ret)); + + ret->accounting_u64s = accounting_u64s; + ret->accounting_types_mask = typemask; + + if (!ioctl(fs.ioctl_fd, BCH_IOCTL_QUERY_ACCOUNTING, ret)) + return ret; + + if (errno == ENOTTY) + return NULL; + + if (errno == ERANGE) { + accounting_u64s *= 2; + continue; + } + + die("BCH_IOCTL_USAGE error: %m"); + } +} + +static inline struct bch_ioctl_dev_usage_v2 *bchu_dev_usage(struct bchfs_handle fs, + unsigned idx) +{ + struct bch_ioctl_dev_usage_v2 *u = xcalloc(sizeof(*u) + sizeof(u->d[0]) * BCH_DATA_NR, 1); + + u->dev = idx; + u->flags = BCH_BY_INDEX; + u->nr_data_types = BCH_DATA_NR; + + if (!ioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE_V2, u)) + return u; + + struct bch_ioctl_dev_usage u_v1 = { .dev = idx, .flags = BCH_BY_INDEX}; + xioctl(fs.ioctl_fd, BCH_IOCTL_DEV_USAGE, &u_v1); + + u->state = u_v1.state; + u->nr_data_types = ARRAY_SIZE(u_v1.d); + u->bucket_size = u_v1.bucket_size; + u->nr_buckets = u_v1.nr_buckets; + + for (unsigned i = 0; i < ARRAY_SIZE(u_v1.d); i++) + u->d[i] = u_v1.d[i]; + + return u; +} + +static inline struct bch_sb *bchu_read_super(struct bchfs_handle fs, unsigned idx) +{ + size_t size = 4096; + struct bch_sb *sb = NULL; + + while (1) { + sb = xrealloc(sb, size); + struct bch_ioctl_read_super i = { + .size = size, + .sb = (unsigned long) sb, + }; + + if (idx != -1) { + i.flags |= BCH_READ_DEV|BCH_BY_INDEX; + i.dev = idx; + } + + if (!ioctl(fs.ioctl_fd, BCH_IOCTL_READ_SUPER, &i)) + return sb; + if (errno != ERANGE) + die("BCH_IOCTL_READ_SUPER error: %m"); + size *= 2; + } +} + +static inline unsigned bchu_disk_get_idx(struct bchfs_handle fs, dev_t dev) +{ + struct bch_ioctl_disk_get_idx i = { .dev = dev }; + + return xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_GET_IDX, &i); +} + +static inline void bchu_disk_resize(struct bchfs_handle fs, + unsigned idx, + u64 nbuckets) +{ + struct bch_ioctl_disk_resize i = { + .flags = BCH_BY_INDEX, + .dev = idx, + .nbuckets = nbuckets, + }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE, &i); +} + +static inline void bchu_disk_resize_journal(struct bchfs_handle fs, + unsigned idx, + u64 nbuckets) +{ + struct bch_ioctl_disk_resize i = { + .flags = BCH_BY_INDEX, + .dev = idx, + .nbuckets = nbuckets, + }; + + xioctl(fs.ioctl_fd, BCH_IOCTL_DISK_RESIZE_JOURNAL, &i); +} + +int bchu_data(struct bchfs_handle, struct bch_ioctl_data); + +struct dev_name { + unsigned idx; + char *dev; + char *label; + uuid_t uuid; + unsigned durability; +}; +typedef DARRAY(struct dev_name) dev_names; + +dev_names bchu_fs_get_devices(struct bchfs_handle); + +#endif /* _LIBBCACHE_H */ diff --git a/c_src/posix_to_bcachefs.c b/c_src/posix_to_bcachefs.c new file mode 100644 index 00000000..d4701263 --- /dev/null +++ b/c_src/posix_to_bcachefs.c @@ -0,0 +1,461 @@ +#include <dirent.h> +#include <sys/xattr.h> +#include <linux/xattr.h> + +#include "posix_to_bcachefs.h" +#include "libbcachefs/alloc_foreground.h" +#include "libbcachefs/buckets.h" +#include "libbcachefs/fs-common.h" +#include "libbcachefs/io_write.h" +#include "libbcachefs/str_hash.h" +#include "libbcachefs/xattr.h" + +void update_inode(struct bch_fs *c, + struct bch_inode_unpacked *inode) +{ + struct bkey_inode_buf packed; + int ret; + + bch2_inode_pack(&packed, inode); + packed.inode.k.p.snapshot = U32_MAX; + ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, + NULL, 0, BTREE_ITER_cached); + if (ret) + die("error updating inode: %s", bch2_err_str(ret)); +} + +void create_link(struct bch_fs *c, + struct bch_inode_unpacked *parent, + const char *name, u64 inum, mode_t mode) +{ + struct qstr qstr = QSTR(name); + struct bch_inode_unpacked parent_u; + struct bch_inode_unpacked inode; + + int ret = bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_link_trans(trans, + (subvol_inum) { 1, parent->bi_inum }, &parent_u, + (subvol_inum) { 1, inum }, &inode, &qstr)); + if (ret) + die("error creating hardlink: %s", bch2_err_str(ret)); +} + +struct bch_inode_unpacked create_file(struct bch_fs *c, + struct bch_inode_unpacked *parent, + const char *name, + uid_t uid, gid_t gid, + mode_t mode, dev_t rdev) +{ + struct qstr qstr = QSTR(name); + struct bch_inode_unpacked new_inode; + + bch2_inode_init_early(c, &new_inode); + + int ret = bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_create_trans(trans, + (subvol_inum) { 1, parent->bi_inum }, parent, + &new_inode, &qstr, + uid, gid, mode, rdev, NULL, NULL, + (subvol_inum) {}, 0)); + if (ret) + die("error creating %s: %s", name, bch2_err_str(ret)); + + return new_inode; +} + +#define for_each_xattr_handler(handlers, handler) \ + if (handlers) \ + for ((handler) = *(handlers)++; \ + (handler) != NULL; \ + (handler) = *(handlers)++) + +static const struct xattr_handler *xattr_resolve_name(char **name) +{ + const struct xattr_handler * const *handlers = bch2_xattr_handlers; + const struct xattr_handler *handler; + + for_each_xattr_handler(handlers, handler) { + char *n; + + n = strcmp_prefix(*name, xattr_prefix(handler)); + if (n) { + if (!handler->prefix ^ !*n) { + if (*n) + continue; + return ERR_PTR(-EINVAL); + } + *name = n; + return handler; + } + } + return ERR_PTR(-EOPNOTSUPP); +} + +void copy_times(struct bch_fs *c, struct bch_inode_unpacked *dst, + struct stat *src) +{ + dst->bi_atime = timespec_to_bch2_time(c, src->st_atim); + dst->bi_mtime = timespec_to_bch2_time(c, src->st_mtim); + dst->bi_ctime = timespec_to_bch2_time(c, src->st_ctim); +} + +void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, + char *src) +{ + struct bch_hash_info hash_info = bch2_hash_info_init(c, dst); + + char attrs[XATTR_LIST_MAX]; + ssize_t attrs_size = llistxattr(src, attrs, sizeof(attrs)); + if (attrs_size < 0) + die("listxattr error: %m"); + + char *next, *attr; + for (attr = attrs; + attr < attrs + attrs_size; + attr = next) { + next = attr + strlen(attr) + 1; + + char val[XATTR_SIZE_MAX]; + ssize_t val_size = lgetxattr(src, attr, val, sizeof(val)); + + if (val_size < 0) + die("error getting xattr val: %m"); + + const struct xattr_handler *h = xattr_resolve_name(&attr); + if (IS_ERR(h)) + continue; + + int ret = bch2_trans_commit_do(c, NULL, NULL, 0, + bch2_xattr_set(trans, + (subvol_inum) { 1, dst->bi_inum }, + dst, &hash_info, attr, + val, val_size, h->flags, 0)); + if (ret < 0) + die("error creating xattr: %s", bch2_err_str(ret)); + } +} + +#define WRITE_DATA_BUF (1 << 20) + +static char buf[WRITE_DATA_BUF] __aligned(PAGE_SIZE); + +static void write_data(struct bch_fs *c, + struct bch_inode_unpacked *dst_inode, + u64 dst_offset, void *buf, size_t len) +{ + struct bch_write_op op; + struct bio_vec bv[WRITE_DATA_BUF / PAGE_SIZE]; + + BUG_ON(dst_offset & (block_bytes(c) - 1)); + BUG_ON(len & (block_bytes(c) - 1)); + BUG_ON(len > WRITE_DATA_BUF); + + bio_init(&op.wbio.bio, NULL, bv, ARRAY_SIZE(bv), 0); + bch2_bio_map(&op.wbio.bio, buf, len); + + bch2_write_op_init(&op, c, bch2_opts_to_inode_opts(c->opts)); + op.write_point = writepoint_hashed(0); + op.nr_replicas = 1; + op.subvol = 1; + op.pos = SPOS(dst_inode->bi_inum, dst_offset >> 9, U32_MAX); + op.flags |= BCH_WRITE_SYNC; + + int ret = bch2_disk_reservation_get(c, &op.res, len >> 9, + c->opts.data_replicas, 0); + if (ret) + die("error reserving space in new filesystem: %s", bch2_err_str(ret)); + + closure_call(&op.cl, bch2_write, NULL, NULL); + + BUG_ON(!(op.flags & BCH_WRITE_SUBMITTED)); + dst_inode->bi_sectors += len >> 9; + + if (op.error) + die("write error: %s", bch2_err_str(op.error)); +} + +void copy_data(struct bch_fs *c, + struct bch_inode_unpacked *dst_inode, + int src_fd, u64 start, u64 end) +{ + while (start < end) { + unsigned len = min_t(u64, end - start, sizeof(buf)); + unsigned pad = round_up(len, block_bytes(c)) - len; + + xpread(src_fd, buf, len, start); + memset(buf + len, 0, pad); + + write_data(c, dst_inode, start, buf, len + pad); + start += len; + } +} + +static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, + u64 logical, u64 physical, u64 length) +{ + struct bch_dev *ca = c->devs[0]; + + BUG_ON(logical & (block_bytes(c) - 1)); + BUG_ON(physical & (block_bytes(c) - 1)); + BUG_ON(length & (block_bytes(c) - 1)); + + logical >>= 9; + physical >>= 9; + length >>= 9; + + BUG_ON(physical + length > bucket_to_sector(ca, ca->mi.nbuckets)); + + while (length) { + struct bkey_i_extent *e; + BKEY_PADDED_ONSTACK(k, BKEY_EXTENT_VAL_U64s_MAX) k; + u64 b = sector_to_bucket(ca, physical); + struct disk_reservation res; + unsigned sectors; + int ret; + + sectors = min(ca->mi.bucket_size - + (physical & (ca->mi.bucket_size - 1)), + length); + + e = bkey_extent_init(&k.k); + e->k.p.inode = dst->bi_inum; + e->k.p.offset = logical + sectors; + e->k.p.snapshot = U32_MAX; + e->k.size = sectors; + bch2_bkey_append_ptr(&e->k_i, (struct bch_extent_ptr) { + .offset = physical, + .dev = 0, + .gen = *bucket_gen(ca, b), + }); + + ret = bch2_disk_reservation_get(c, &res, sectors, 1, + BCH_DISK_RESERVATION_NOFAIL); + if (ret) + die("error reserving space in new filesystem: %s", + bch2_err_str(ret)); + + ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, 0, 0); + if (ret) + die("btree insert error %s", bch2_err_str(ret)); + + bch2_disk_reservation_put(c, &res); + + dst->bi_sectors += sectors; + logical += sectors; + physical += sectors; + length -= sectors; + } +} + +void copy_link(struct bch_fs *c, struct bch_inode_unpacked *dst, + char *src) +{ + ssize_t i; + ssize_t ret = readlink(src, buf, sizeof(buf)); + if (ret < 0) + die("readlink error: %m"); + + for (i = ret; i < round_up(ret, block_bytes(c)); i++) + buf[i] = 0; + + write_data(c, dst, 0, buf, round_up(ret, block_bytes(c))); +} + +static void copy_file(struct bch_fs *c, struct bch_inode_unpacked *dst, + int src_fd, u64 src_size, + char *src_path, struct copy_fs_state *s) +{ + struct fiemap_iter iter; + struct fiemap_extent e; + + fiemap_for_each(src_fd, iter, e) + if (e.fe_flags & FIEMAP_EXTENT_UNKNOWN) { + fsync(src_fd); + break; + } + fiemap_iter_exit(&iter); + + fiemap_for_each(src_fd, iter, e) { + u64 src_max = roundup(src_size, block_bytes(c)); + + e.fe_length = min(e.fe_length, src_max - e.fe_logical); + + if ((e.fe_logical & (block_bytes(c) - 1)) || + (e.fe_length & (block_bytes(c) - 1))) + die("Unaligned extent in %s - can't handle", src_path); + + if (BCH_MIGRATE_copy == s->type || (e.fe_flags & (FIEMAP_EXTENT_UNKNOWN| + FIEMAP_EXTENT_ENCODED| + FIEMAP_EXTENT_NOT_ALIGNED| + FIEMAP_EXTENT_DATA_INLINE))) { + copy_data(c, dst, src_fd, e.fe_logical, + e.fe_logical + min(src_size - e.fe_logical, + e.fe_length)); + continue; + } + + /* + * if the data is below 1 MB, copy it so it doesn't conflict + * with bcachefs's potentially larger superblock: + */ + if (e.fe_physical < 1 << 20) { + copy_data(c, dst, src_fd, e.fe_logical, + e.fe_logical + min(src_size - e.fe_logical, + e.fe_length)); + continue; + } + + if ((e.fe_physical & (block_bytes(c) - 1))) + die("Unaligned extent in %s - can't handle", src_path); + + range_add(&s->extents, e.fe_physical, e.fe_length); + link_data(c, dst, e.fe_logical, e.fe_physical, e.fe_length); + } + fiemap_iter_exit(&iter); +} + +static void copy_dir(struct copy_fs_state *s, + struct bch_fs *c, + struct bch_inode_unpacked *dst, + int src_fd, const char *src_path) +{ + DIR *dir = fdopendir(src_fd); + struct dirent *d; + + while ((errno = 0), (d = readdir(dir))) { + struct bch_inode_unpacked inode; + int fd; + + if (fchdir(src_fd)) + die("chdir error: %m"); + + struct stat stat = + xfstatat(src_fd, d->d_name, AT_SYMLINK_NOFOLLOW); + + if (!strcmp(d->d_name, ".") || + !strcmp(d->d_name, "..") || + !strcmp(d->d_name, "lost+found")) + continue; + + if (BCH_MIGRATE_migrate == s->type && stat.st_ino == s->bcachefs_inum) + continue; + + char *child_path = mprintf("%s/%s", src_path, d->d_name); + + if (s->type == BCH_MIGRATE_migrate && stat.st_dev != s->dev) + die("%s does not have correct st_dev!", child_path); + + u64 *dst_inum = S_ISREG(stat.st_mode) + ? genradix_ptr_alloc(&s->hardlinks, stat.st_ino, GFP_KERNEL) + : NULL; + + if (dst_inum && *dst_inum) { + create_link(c, dst, d->d_name, *dst_inum, S_IFREG); + goto next; + } + + inode = create_file(c, dst, d->d_name, + stat.st_uid, stat.st_gid, + stat.st_mode, stat.st_rdev); + + if (dst_inum) + *dst_inum = inode.bi_inum; + + copy_xattrs(c, &inode, d->d_name); + + /* copy xattrs */ + + switch (mode_to_type(stat.st_mode)) { + case DT_DIR: + fd = xopen(d->d_name, O_RDONLY|O_NOATIME); + copy_dir(s, c, &inode, fd, child_path); + close(fd); + break; + case DT_REG: + inode.bi_size = stat.st_size; + + fd = xopen(d->d_name, O_RDONLY|O_NOATIME); + copy_file(c, &inode, fd, stat.st_size, + child_path, s); + close(fd); + break; + case DT_LNK: + inode.bi_size = stat.st_size; + + copy_link(c, &inode, d->d_name); + break; + case DT_FIFO: + case DT_CHR: + case DT_BLK: + case DT_SOCK: + case DT_WHT: + /* nothing else to copy for these: */ + break; + default: + BUG(); + } + + copy_times(c, &inode, &stat); + update_inode(c, &inode); +next: + free(child_path); + } + + if (errno) + die("readdir error: %m"); + closedir(dir); +} + +static void reserve_old_fs_space(struct bch_fs *c, + struct bch_inode_unpacked *root_inode, + ranges *extents) +{ + struct bch_dev *ca = c->devs[0]; + struct bch_inode_unpacked dst; + struct hole_iter iter; + struct range i; + + dst = create_file(c, root_inode, "old_migrated_filesystem", + 0, 0, S_IFREG|0400, 0); + dst.bi_size = bucket_to_sector(ca, ca->mi.nbuckets) << 9; + + ranges_sort_merge(extents); + + for_each_hole(iter, *extents, bucket_to_sector(ca, ca->mi.nbuckets) << 9, i) + link_data(c, &dst, i.start, i.start, i.end - i.start); + + update_inode(c, &dst); +} + +void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, + struct copy_fs_state *s) +{ + syncfs(src_fd); + + struct bch_inode_unpacked root_inode; + int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO }, + &root_inode); + if (ret) + die("error looking up root directory: %s", bch2_err_str(ret)); + + if (fchdir(src_fd)) + die("chdir error: %m"); + + struct stat stat = xfstat(src_fd); + copy_times(c, &root_inode, &stat); + copy_xattrs(c, &root_inode, "."); + + + /* now, copy: */ + copy_dir(s, c, &root_inode, src_fd, src_path); + + if (BCH_MIGRATE_migrate == s->type) + reserve_old_fs_space(c, &root_inode, &s->extents); + + update_inode(c, &root_inode); + + if (BCH_MIGRATE_migrate == s->type) + darray_exit(&s->extents); + + genradix_free(&s->hardlinks); +} diff --git a/c_src/posix_to_bcachefs.h b/c_src/posix_to_bcachefs.h new file mode 100644 index 00000000..facb75ed --- /dev/null +++ b/c_src/posix_to_bcachefs.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _POSIX_TO_BCACHEFS_H +#define _POSIX_TO_BCACHEFS_H + +/* + * This header exports the functionality needed for copying data from existing + * posix compliant filesystems to bcachefs. There are two use cases: + * 1. Creating a new bcachefs filesystem using `bcachefs format`, we can + * specify a source directory tree which will be copied over the new + * bcachefs filesytem. + * 2. Migrating an existing filesystem in place, with `bcachefs migrate`. + * This will allocate space for the bcachefs metadata, but the actual data + * represented by the extents will not be duplicated. The bcachefs metadata + * will simply point to the existing extents. + * + * To avoid code duplication, `copy_fs` deals with both cases. See the function + * documentation for more details. + */ + +#include "libbcachefs.h" + +enum bch_migrate_type { + BCH_MIGRATE_copy, + BCH_MIGRATE_migrate +}; + +/* + * The migrate action uses all the fields in this struct. + * The copy action only uses the `hardlinks` field. Since `hardlinks` is + * initialized with zeroes, an empty `copy_fs_state` struct can be passed. + */ +struct copy_fs_state { + u64 bcachefs_inum; + dev_t dev; + + GENRADIX(u64) hardlinks; + ranges extents; + enum bch_migrate_type type; +}; + +/* + * The `copy_fs` function is used for both copying a directory tree to a new + * bcachefs filesystem and migrating an existing one, depending on the value + * from the `type` field in `copy_fs_state` struct. + * + * In case of copy, an empty `copy_fs_state` structure is passed to `copy_fs` + * (only the `hardlinks` field is used, and that is initialized with zeroes). + * + * In the migrate case, all the fields from `copy_fs_state` need to be + * initialized (`hardlinks` is initialized with zeroes). + */ +void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, + struct copy_fs_state *s); +#endif /* _LIBBCACHE_H */ diff --git a/c_src/qcow2.c b/c_src/qcow2.c new file mode 100644 index 00000000..30a6e056 --- /dev/null +++ b/c_src/qcow2.c @@ -0,0 +1,134 @@ + +#include <errno.h> +#include <sys/types.h> +#include <unistd.h> + +#include "qcow2.h" +#include "tools-util.h" + +#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb) +#define QCOW_VERSION 2 +#define QCOW_OFLAG_COPIED (1LL << 63) + +struct qcow2_hdr { + u32 magic; + u32 version; + + u64 backing_file_offset; + u32 backing_file_size; + + u32 block_bits; + u64 size; + u32 crypt_method; + + u32 l1_size; + u64 l1_table_offset; + + u64 refcount_table_offset; + u32 refcount_table_blocks; + + u32 nb_snapshots; + u64 snapshots_offset; +}; + +struct qcow2_image { + int fd; + u32 block_size; + u64 *l1_table; + u64 l1_offset; + u32 l1_index; + u64 *l2_table; + u64 offset; +}; + +static void flush_l2(struct qcow2_image *img) +{ + if (img->l1_index != -1) { + img->l1_table[img->l1_index] = + cpu_to_be64(img->offset|QCOW_OFLAG_COPIED); + xpwrite(img->fd, img->l2_table, img->block_size, img->offset, + "qcow2 l2 table"); + img->offset += img->block_size; + + memset(img->l2_table, 0, img->block_size); + img->l1_index = -1; + } +} + +static void add_l2(struct qcow2_image *img, u64 src_blk, u64 dst_offset) +{ + unsigned l2_size = img->block_size / sizeof(u64); + u64 l1_index = src_blk / l2_size; + u64 l2_index = src_blk & (l2_size - 1); + + if (img->l1_index != l1_index) { + flush_l2(img); + img->l1_index = l1_index; + } + + img->l2_table[l2_index] = cpu_to_be64(dst_offset|QCOW_OFLAG_COPIED); +} + +void qcow2_write_image(int infd, int outfd, ranges *data, + unsigned block_size) +{ + u64 image_size = get_size(infd); + unsigned l2_size = block_size / sizeof(u64); + unsigned l1_size = DIV_ROUND_UP(image_size, (u64) block_size * l2_size); + struct qcow2_hdr hdr = { 0 }; + struct qcow2_image img = { + .fd = outfd, + .block_size = block_size, + .l2_table = xcalloc(l2_size, sizeof(u64)), + .l1_table = xcalloc(l1_size, sizeof(u64)), + .l1_index = -1, + .offset = round_up(sizeof(hdr), block_size), + }; + char *buf = xmalloc(block_size); + u64 src_offset, dst_offset; + + assert(is_power_of_2(block_size)); + + ranges_roundup(data, block_size); + ranges_sort_merge(data); + + /* Write data: */ + darray_for_each(*data, r) + for (src_offset = r->start; + src_offset < r->end; + src_offset += block_size) { + dst_offset = img.offset; + img.offset += img.block_size; + + xpread(infd, buf, block_size, src_offset); + xpwrite(outfd, buf, block_size, dst_offset, + "qcow2 data"); + + add_l2(&img, src_offset / block_size, dst_offset); + } + + flush_l2(&img); + + /* Write L1 table: */ + dst_offset = img.offset; + img.offset += round_up(l1_size * sizeof(u64), block_size); + xpwrite(img.fd, img.l1_table, l1_size * sizeof(u64), dst_offset, + "qcow2 l1 table"); + + /* Write header: */ + hdr.magic = cpu_to_be32(QCOW_MAGIC); + hdr.version = cpu_to_be32(QCOW_VERSION); + hdr.block_bits = cpu_to_be32(ilog2(block_size)); + hdr.size = cpu_to_be64(image_size); + hdr.l1_size = cpu_to_be32(l1_size); + hdr.l1_table_offset = cpu_to_be64(dst_offset); + + memset(buf, 0, block_size); + memcpy(buf, &hdr, sizeof(hdr)); + xpwrite(img.fd, buf, block_size, 0, + "qcow2 header"); + + free(img.l2_table); + free(img.l1_table); + free(buf); +} diff --git a/c_src/qcow2.h b/c_src/qcow2.h new file mode 100644 index 00000000..0943d55c --- /dev/null +++ b/c_src/qcow2.h @@ -0,0 +1,9 @@ +#ifndef _QCOW2_H +#define _QCOW2_H + +#include <linux/types.h> +#include "tools-util.h" + +void qcow2_write_image(int, int, ranges *, unsigned); + +#endif /* _QCOW2_H */ diff --git a/c_src/tools-util.c b/c_src/tools-util.c new file mode 100644 index 00000000..3a76a02e --- /dev/null +++ b/c_src/tools-util.c @@ -0,0 +1,741 @@ +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/fs.h> +#include <math.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/types.h> +#include <unistd.h> + +#include <blkid.h> +#include <uuid/uuid.h> + +#include "libbcachefs.h" +#include "libbcachefs/bcachefs_ioctl.h" +#include "linux/sort.h" +#include "tools-util.h" +#include "libbcachefs/util.h" + +void die(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fputc('\n', stderr); + + _exit(EXIT_FAILURE); +} + +char *mprintf(const char *fmt, ...) +{ + va_list args; + char *str; + int ret; + + va_start(args, fmt); + ret = vasprintf(&str, fmt, args); + va_end(args); + + if (ret < 0) + die("insufficient memory"); + + return str; +} + +void xpread(int fd, void *buf, size_t count, off_t offset) +{ + while (count) { + ssize_t r = pread(fd, buf, count, offset); + + if (r < 0) + die("read error: %m"); + if (!r) + die("pread error: unexpected eof"); + count -= r; + offset += r; + } +} + +void xpwrite(int fd, const void *buf, size_t count, off_t offset, const char *msg) +{ + ssize_t r = pwrite(fd, buf, count, offset); + + if (r != count) + die("error writing %s (ret %zi err %m)", msg, r); +} + +struct stat xfstatat(int dirfd, const char *path, int flags) +{ + struct stat stat; + if (fstatat(dirfd, path, &stat, flags)) + die("stat error: %m"); + return stat; +} + +struct stat xfstat(int fd) +{ + struct stat stat; + if (fstat(fd, &stat)) + die("stat error: %m"); + return stat; +} + +struct stat xstat(const char *path) +{ + struct stat statbuf; + if (stat(path, &statbuf)) + die("stat error statting %s: %m", path); + return statbuf; +} + +/* File parsing (i.e. sysfs) */ + +void write_file_str(int dirfd, const char *path, const char *str) +{ + int fd = xopenat(dirfd, path, O_WRONLY); + ssize_t wrote, len = strlen(str); + + wrote = write(fd, str, len); + if (wrote != len) + die("read error: %m"); + close(fd); +} + +char *read_file_str(int dirfd, const char *path) +{ + int fd = xopenat(dirfd, path, O_RDONLY); + ssize_t len = xfstat(fd).st_size; + + char *buf = xmalloc(len + 1); + + len = read(fd, buf, len); + if (len < 0) + die("read error: %m"); + + buf[len] = '\0'; + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + if (!strlen(buf)) { + free(buf); + buf = NULL; + } + + close(fd); + + return buf; +} + +u64 read_file_u64(int dirfd, const char *path) +{ + char *buf = read_file_str(dirfd, path); + u64 v; + if (bch2_strtou64_h(buf, &v)) + die("read_file_u64: error parsing %s (got %s)", path, buf); + free(buf); + return v; +} + +/* String list options: */ + +ssize_t read_string_list_or_die(const char *opt, const char * const list[], + const char *msg) +{ + ssize_t v = match_string(list, -1, opt); + if (v < 0) + die("Bad %s %s", msg, opt); + + return v; +} + +/* Returns size of file or block device: */ +u64 get_size(int fd) +{ + struct stat statbuf = xfstat(fd); + + if (!S_ISBLK(statbuf.st_mode)) + return statbuf.st_size; + + u64 ret; + xioctl(fd, BLKGETSIZE64, &ret); + return ret; +} + +/* Returns blocksize, in bytes: */ +unsigned get_blocksize(int fd) +{ + struct stat statbuf = xfstat(fd); + + if (!S_ISBLK(statbuf.st_mode)) + return statbuf.st_blksize; + + unsigned ret; + xioctl(fd, BLKPBSZGET, &ret); + return ret; +} + +/* Open a block device, do magic blkid stuff to probe for existing filesystems: */ +int open_for_format(struct dev_opts *dev, bool force) +{ + int blkid_version_code = blkid_get_library_version(NULL, NULL); + if (blkid_version_code < 2401) { + if (force) { + fprintf( + stderr, + "Continuing with out of date libblkid %s because --force was passed.\n", + BLKID_VERSION); + } else { + // Reference for picking 2.40.1: + // https://mirrors.edge.kernel.org/pub/linux/utils/util-linux/v2.40/v2.40.1-ReleaseNotes + // https://github.com/util-linux/util-linux/issues/3103 + die( + "Refusing to format when using libblkid %s\n" + "libblkid >= 2.40.1 is required to check for existing filesystems\n" + "Earlier versions may not recognize some bcachefs filesystems.\n", BLKID_VERSION); + } + } + + blkid_probe pr; + const char *fs_type = NULL, *fs_label = NULL; + size_t fs_type_len, fs_label_len; + + dev->file = bdev_file_open_by_path(dev->path, + BLK_OPEN_READ|BLK_OPEN_WRITE|BLK_OPEN_EXCL|BLK_OPEN_BUFFERED, + dev, NULL); + int ret = PTR_ERR_OR_ZERO(dev->file); + if (ret < 0) + die("Error opening device to format %s: %s", dev->path, strerror(-ret)); + dev->bdev = file_bdev(dev->file); + + if (!(pr = blkid_new_probe())) + die("blkid error 1"); + if (blkid_probe_set_device(pr, dev->bdev->bd_fd, 0, 0)) + die("blkid error 2"); + if (blkid_probe_enable_partitions(pr, true) || + blkid_probe_enable_superblocks(pr, true) || + blkid_probe_set_superblocks_flags(pr, + BLKID_SUBLKS_LABEL|BLKID_SUBLKS_TYPE|BLKID_SUBLKS_MAGIC)) + die("blkid error 3"); + if (blkid_do_fullprobe(pr) < 0) + die("blkid error 4"); + + blkid_probe_lookup_value(pr, "TYPE", &fs_type, &fs_type_len); + blkid_probe_lookup_value(pr, "LABEL", &fs_label, &fs_label_len); + + if (fs_type) { + if (fs_label) + printf("%s contains a %s filesystem labelled '%s'\n", + dev->path, fs_type, fs_label); + else + printf("%s contains a %s filesystem\n", + dev->path, fs_type); + if (!force) { + fputs("Proceed anyway?", stdout); + if (!ask_yn()) + exit(EXIT_FAILURE); + } + while (blkid_do_probe(pr) == 0) { + if (blkid_do_wipe(pr, 0)) + die("Failed to wipe preexisting metadata."); + } + } + + blkid_free_probe(pr); + return ret; +} + +bool ask_yn(void) +{ + const char *short_yes = "yY"; + char *buf = NULL; + size_t buflen = 0; + bool ret; + + fputs(" (y,n) ", stdout); + fflush(stdout); + + if (getline(&buf, &buflen, stdin) < 0) + die("error reading from standard input"); + + ret = strchr(short_yes, buf[0]); + free(buf); + return ret; +} + +static int range_cmp(const void *_l, const void *_r) +{ + const struct range *l = _l, *r = _r; + + if (l->start < r->start) + return -1; + if (l->start > r->start) + return 1; + return 0; +} + +void ranges_sort_merge(ranges *r) +{ + ranges tmp = { 0 }; + + sort(r->data, r->nr, sizeof(r->data[0]), range_cmp, NULL); + + /* Merge contiguous ranges: */ + darray_for_each(*r, i) { + struct range *t = tmp.nr ? &tmp.data[tmp.nr - 1] : NULL; + + if (t && t->end >= i->start) + t->end = max(t->end, i->end); + else + darray_push(&tmp, *i); + } + + darray_exit(r); + *r = tmp; +} + +void ranges_roundup(ranges *r, unsigned block_size) +{ + darray_for_each(*r, i) { + i->start = round_down(i->start, block_size); + i->end = round_up(i->end, block_size); + } +} + +void ranges_rounddown(ranges *r, unsigned block_size) +{ + darray_for_each(*r, i) { + i->start = round_up(i->start, block_size); + i->end = round_down(i->end, block_size); + i->end = max(i->end, i->start); + } +} + +struct fiemap_extent fiemap_iter_next(struct fiemap_iter *iter) +{ + struct fiemap_extent e; + + BUG_ON(iter->idx > iter->f->fm_mapped_extents); + + if (iter->idx == iter->f->fm_mapped_extents) { + xioctl(iter->fd, FS_IOC_FIEMAP, iter->f); + + if (!iter->f->fm_mapped_extents) + return (struct fiemap_extent) { .fe_length = 0 }; + + iter->idx = 0; + } + + e = iter->f->fm_extents[iter->idx++]; + BUG_ON(!e.fe_length); + + iter->f->fm_start = e.fe_logical + e.fe_length; + + return e; +} + +char *strcmp_prefix(char *a, const char *a_prefix) +{ + while (*a_prefix && *a == *a_prefix) { + a++; + a_prefix++; + } + return *a_prefix ? NULL : a; +} + +/* crc32c */ + +static u32 crc32c_default(u32 crc, const void *buf, size_t size) +{ + static const u32 crc32c_tab[] = { + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, + 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, + 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, + 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, + 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, + 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, + 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, + 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, + 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, + 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, + 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, + 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, + 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, + 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, + 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, + 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, + 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, + 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, + 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, + 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, + 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, + 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, + 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, + 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, + 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, + 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, + 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, + 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, + 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, + 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, + 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, + 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, + 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 + }; + const u8 *p = buf; + + while (size--) + crc = crc32c_tab[(crc ^ *p++) & 0xFFL] ^ (crc >> 8); + + return crc; +} + +#include <linux/compiler.h> + +#ifdef __x86_64__ + +#ifdef CONFIG_X86_64 +#define REX_PRE "0x48, " +#else +#define REX_PRE +#endif + +static u32 crc32c_sse42(u32 crc, const void *buf, size_t size) +{ + while (size >= sizeof(long)) { + const unsigned long *d = buf; + + __asm__ __volatile__( + ".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;" + :"=S"(crc) + :"0"(crc), "c"(*d) + ); + buf += sizeof(long); + size -= sizeof(long); + } + + while (size) { + const u8 *d = buf; + + __asm__ __volatile__( + ".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1" + :"=S"(crc) + :"0"(crc), "c"(*d) + ); + buf += 1; + size -= 1; + } + + return crc; +} + +#endif + +static void *resolve_crc32c(void) +{ +#ifdef __x86_64__ + if (__builtin_cpu_supports("sse4.2")) + return crc32c_sse42; +#endif + return crc32c_default; +} + +/* + * ifunc is buggy and I don't know what breaks it (LTO?) + */ +#ifdef HAVE_WORKING_IFUNC + +static void *ifunc_resolve_crc32c(void) +{ + __builtin_cpu_init(); + + return resolve_crc32c +} + +u32 crc32c(u32, const void *, size_t) + __attribute__((ifunc("ifunc_resolve_crc32c"))); + +#else + +u32 crc32c(u32 crc, const void *buf, size_t size) +{ + static u32 (*real_crc32c)(u32, const void *, size_t); + + if (unlikely(!real_crc32c)) + real_crc32c = resolve_crc32c(); + + return real_crc32c(crc, buf, size); +} + +#endif /* HAVE_WORKING_IFUNC */ + +char *dev_to_name(dev_t dev) +{ + char *line = NULL, *name = NULL; + size_t n = 0; + + FILE *f = fopen("/proc/partitions", "r"); + if (!f) + die("error opening /proc/partitions: %m"); + + while (getline(&line, &n, f) != -1) { + unsigned ma, mi; + u64 sectors; + + name = realloc(name, n + 1); + + if (sscanf(line, " %u %u %llu %s", &ma, &mi, §ors, name) == 4 && + ma == major(dev) && mi == minor(dev)) + goto found; + } + + free(name); + name = NULL; +found: + fclose(f); + free(line); + return name; +} + +char *dev_to_path(dev_t dev) +{ + char *name = dev_to_name(dev); + if (!name) + return NULL; + + char *path = mprintf("/dev/%s", name); + + free(name); + return path; +} + +struct mntent *dev_to_mount(char *dev) +{ + struct mntent *mnt, *ret = NULL; + FILE *f = setmntent("/proc/mounts", "r"); + if (!f) + die("error opening /proc/mounts: %m"); + + struct stat d1 = xstat(dev); + + while ((mnt = getmntent(f))) { + char *d, *p = mnt->mnt_fsname; + + while ((d = strsep(&p, ":"))) { + struct stat d2; + + if (stat(d, &d2)) + continue; + + if (S_ISBLK(d1.st_mode) != S_ISBLK(d2.st_mode)) + continue; + + if (S_ISBLK(d1.st_mode)) { + if (d1.st_rdev != d2.st_rdev) + continue; + } else { + if (d1.st_dev != d2.st_dev || + d1.st_ino != d2.st_ino) + continue; + } + + ret = mnt; + goto found; + } + } +found: + fclose(f); + return ret; +} + +int dev_mounted(char *dev) +{ + struct mntent *mnt = dev_to_mount(dev); + + if (!mnt) + return 0; + if (hasmntopt(mnt, "ro")) + return 1; + return 2; +} + +static char *dev_to_sysfs_path(dev_t dev) +{ + return mprintf("/sys/dev/block/%u:%u", major(dev), minor(dev)); +} + +char *fd_to_dev_model(int fd) +{ + struct stat stat = xfstat(fd); + + if (S_ISBLK(stat.st_mode)) { + char *sysfs_path = dev_to_sysfs_path(stat.st_rdev); + + char *model_path = mprintf("%s/device/model", sysfs_path); + if (!access(model_path, R_OK)) + goto got_model; + free(model_path); + + /* partition? try parent */ + + char buf[1024]; + if (readlink(sysfs_path, buf, sizeof(buf)) < 0) + die("readlink error on %s: %m", sysfs_path); + + free(sysfs_path); + sysfs_path = strdup(buf); + + *strrchr(sysfs_path, '/') = 0; + model_path = mprintf("%s/device/model", sysfs_path); + if (!access(model_path, R_OK)) + goto got_model; + + return strdup("(unknown device)"); + char *model; +got_model: + model = read_file_str(AT_FDCWD, model_path); + free(model_path); + free(sysfs_path); + return model; + } else { + return strdup("(reg file)"); + } +} + +static int kstrtoull_symbolic(const char *s, unsigned int base, unsigned long long *res) +{ + if (!strcmp(s, "U64_MAX")) { + *res = U64_MAX; + return 0; + } + + if (!strcmp(s, "U32_MAX")) { + *res = U32_MAX; + return 0; + } + + return kstrtoull(s, base, res); +} + +static int kstrtouint_symbolic(const char *s, unsigned int base, unsigned *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull_symbolic(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long long)(unsigned int)tmp) + return -ERANGE; + *res = tmp; + return 0; +} + +struct bpos bpos_parse(char *buf) +{ + char *orig = strdup(buf); + char *s = buf; + + char *inode_s = strsep(&s, ":"); + char *offset_s = strsep(&s, ":"); + char *snapshot_s = strsep(&s, ":"); + + if (!inode_s || !offset_s || s) + die("invalid bpos %s", orig); + free(orig); + + u64 inode_v = 0, offset_v = 0; + u32 snapshot_v = 0; + if (kstrtoull_symbolic(inode_s, 10, &inode_v)) + die("invalid bpos.inode %s", inode_s); + + if (kstrtoull_symbolic(offset_s, 10, &offset_v)) + die("invalid bpos.offset %s", offset_s); + + if (snapshot_s && + kstrtouint_symbolic(snapshot_s, 10, &snapshot_v)) + die("invalid bpos.snapshot %s", snapshot_s); + + return (struct bpos) { .inode = inode_v, .offset = offset_v, .snapshot = snapshot_v }; +} + +struct bbpos bbpos_parse(char *buf) +{ + char *s = buf, *field; + struct bbpos ret; + + if (!(field = strsep(&s, ":"))) + die("invalid bbpos %s", buf); + + ret.btree = read_string_list_or_die(field, __bch2_btree_ids, "btree id"); + + if (!s) + die("invalid bbpos %s", buf); + + ret.pos = bpos_parse(s); + return ret; +} + +struct bbpos_range bbpos_range_parse(char *buf) +{ + char *s = buf; + char *start_str = strsep(&s, "-"); + char *end_str = strsep(&s, "-"); + + struct bbpos start = bbpos_parse(start_str); + struct bbpos end = end_str ? bbpos_parse(end_str) : start; + + return (struct bbpos_range) { .start = start, .end = end }; +} + +darray_str get_or_split_cmdline_devs(int argc, char *argv[]) +{ + darray_str ret = {}; + + if (argc == 1) { + bch2_split_devs(argv[0], &ret); + } else { + for (unsigned i = 0; i < argc; i++) + darray_push(&ret, strdup(argv[i])); + } + + return ret; +} diff --git a/c_src/tools-util.h b/c_src/tools-util.h new file mode 100644 index 00000000..572aca05 --- /dev/null +++ b/c_src/tools-util.h @@ -0,0 +1,214 @@ +#ifndef _TOOLS_UTIL_H +#define _TOOLS_UTIL_H + +#include <errno.h> +#include <mntent.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/bug.h> +#include <linux/byteorder.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/uuid.h> +#include "libbcachefs/bcachefs.h" +#include "libbcachefs/bbpos.h" +#include "libbcachefs/darray.h" + +#define noreturn __attribute__((noreturn)) + +void die(const char *, ...) + __attribute__ ((format (printf, 1, 2))) noreturn; +char *mprintf(const char *, ...) + __attribute__ ((format (printf, 1, 2))); +void xpread(int, void *, size_t, off_t); +void xpwrite(int, const void *, size_t, off_t, const char *); +struct stat xfstatat(int, const char *, int); +struct stat xfstat(int); +struct stat xstat(const char *); + +static inline void *xmalloc(size_t size) +{ + void *p = malloc(size); + + if (!p) + die("insufficient memory"); + + memset(p, 0, size); + return p; +} + +static inline void *xcalloc(size_t count, size_t size) +{ + void *p = calloc(count, size); + + if (!p) + die("insufficient memory"); + + return p; +} + +static inline void *xrealloc(void *p, size_t size) +{ + p = realloc(p, size); + if (!p) + die("insufficient memory"); + + return p; +} + +#define xopenat(_dirfd, _path, ...) \ +({ \ + int _fd = openat((_dirfd), (_path), __VA_ARGS__); \ + if (_fd < 0) \ + die("Error opening %s: %m", (_path)); \ + _fd; \ +}) + +#define xopen(...) xopenat(AT_FDCWD, __VA_ARGS__) + +#define xioctl(_fd, _nr, ...) \ +({ \ + int _ret = ioctl((_fd), (_nr), ##__VA_ARGS__); \ + if (_ret < 0) \ + die(#_nr " ioctl error: %m"); \ + _ret; \ +}) + +void write_file_str(int, const char *, const char *); +char *read_file_str(int, const char *); +u64 read_file_u64(int, const char *); + +ssize_t read_string_list_or_die(const char *, const char * const[], + const char *); + +u64 get_size(int); +unsigned get_blocksize(int); +struct dev_opts; +int open_for_format(struct dev_opts *, bool); + +bool ask_yn(void); + +struct range { + u64 start; + u64 end; +}; + +typedef DARRAY(struct range) ranges; + +static inline void range_add(ranges *data, u64 offset, u64 size) +{ + darray_push(data, ((struct range) { + .start = offset, + .end = offset + size + })); +} + +void ranges_sort_merge(ranges *); +void ranges_roundup(ranges *, unsigned); +void ranges_rounddown(ranges *, unsigned); + +struct hole_iter { + ranges r; + size_t idx; + u64 end; +}; + +static inline struct range hole_iter_next(struct hole_iter *iter) +{ + struct range r = { + .start = iter->idx ? iter->r.data[iter->idx - 1].end : 0, + .end = iter->idx < iter->r.nr + ? iter->r.data[iter->idx].start : iter->end, + }; + + BUG_ON(r.start > r.end); + + iter->idx++; + return r; +} + +#define for_each_hole(_iter, _ranges, _end, _i) \ + for (_iter = (struct hole_iter) { .r = _ranges, .end = _end }; \ + (_iter.idx <= _iter.r.nr && \ + (_i = hole_iter_next(&_iter), true));) + +#include <linux/fiemap.h> + +struct fiemap_iter { + struct fiemap *f; + unsigned idx; + int fd; +}; + +static inline void fiemap_iter_init(struct fiemap_iter *iter, int fd) +{ + memset(iter, 0, sizeof(*iter)); + + iter->f = xmalloc(sizeof(struct fiemap) + + sizeof(struct fiemap_extent) * 1024); + + iter->f->fm_extent_count = 1024; + iter->f->fm_length = FIEMAP_MAX_OFFSET; + iter->fd = fd; +} + +static inline void fiemap_iter_exit(struct fiemap_iter *iter) +{ + free(iter->f); + memset(iter, 0, sizeof(*iter)); +} + +struct fiemap_extent fiemap_iter_next(struct fiemap_iter *); + +#define fiemap_for_each(fd, iter, extent) \ + for (fiemap_iter_init(&iter, fd); \ + (extent = fiemap_iter_next(&iter)).fe_length;) + +char *strcmp_prefix(char *, const char *); + +/* Avoid conflicts with libblkid's crc32 function in static builds */ +#define crc32c bch_crc32c +u32 crc32c(u32, const void *, size_t); + +char *dev_to_name(dev_t); +char *dev_to_path(dev_t); +struct mntent *dev_to_mount(char *); +int dev_mounted(char *); +char *fd_to_dev_model(int); + +#define args_shift(_nr) \ +do { \ + unsigned _n = min((_nr), argc); \ + argc -= _n; \ + argv += _n; \ +} while (0) + +#define arg_pop() \ +({ \ + char *_ret = argc ? argv[0] : NULL; \ + if (_ret) \ + args_shift(1); \ + _ret; \ +}) + +struct bpos bpos_parse(char *); +struct bbpos bbpos_parse(char *); + +struct bbpos_range { + struct bbpos start; + struct bbpos end; +}; + +struct bbpos_range bbpos_range_parse(char *); + +darray_str get_or_split_cmdline_devs(int argc, char *argv[]); + +#endif /* _TOOLS_UTIL_H */ |