summaryrefslogtreecommitdiff
path: root/make-bcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'make-bcache.c')
-rw-r--r--make-bcache.c461
1 files changed, 263 insertions, 198 deletions
diff --git a/make-bcache.c b/make-bcache.c
index 75df4d3e..450ba99d 100644
--- a/make-bcache.c
+++ b/make-bcache.c
@@ -96,91 +96,105 @@ unsigned hatoi_validate(const char *s, const char *msg)
return v;
}
-char *skip_spaces(const char *str)
+void usage()
{
- while (isspace(*str))
- ++str;
- return (char *)str;
+ fprintf(stderr,
+ "Usage: make-bcache [options] device\n"
+ " -C, --cache Format a cache device\n"
+ " -B, --bdev Format a backing device\n"
+ " --wipe-bcache destroy existing bcache data if present\n"
+ " -l, --label label\n"
+ " --cset-uuid UUID for the cache set\n"
+ " --csum-type One of (none|crc32c|crc64)\n"
+
+ " -b, --bucket bucket size\n"
+ " -w, --block block size (hard sector size of SSD, often 2k)\n"
+
+ " --replication-set replication set of subsequent devices\n"
+ " --meta-replicas number of metadata replicas\n"
+ " --data-replicas number of data replicas\n"
+ " --tier tier of subsequent devices\n"
+ " --cache_replacement_policy one of (lru|fifo|random)\n"
+ " --discard enable discards\n"
+
+ " --writeback enable writeback\n"
+ " -o, --data-offset data offset in sectors\n"
+ " -h, --help display this help and exit\n");
+ exit(EXIT_FAILURE);
}
-char *strim(char *s)
+static void do_write_sb(int fd, struct cache_sb *sb)
{
- size_t size;
- char *end;
+ char zeroes[SB_START] = {0};
+ size_t bytes = ((void *) bset_bkey_last(sb)) - (void *) sb;
- s = skip_spaces(s);
- size = strlen(s);
- if (!size)
- return s;
-
- end = s + size - 1;
- while (end >= s && isspace(*end))
- end--;
- *(end + 1) = '\0';
+ /* Zero start of disk */
+ if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
+ perror("write error\n");
+ exit(EXIT_FAILURE);
+ }
+ /* Write superblock */
+ if (pwrite(fd, sb, bytes, SB_START) != bytes) {
+ perror("write error\n");
+ exit(EXIT_FAILURE);
+ }
- return s;
+ fsync(fd);
+ close(fd);
}
-ssize_t read_string_list(const char *buf, const char * const list[])
+static void write_backingdev_sb(int fd, unsigned block_size, unsigned bucket_size,
+ bool writeback, uint64_t data_offset,
+ const char *label,
+ uuid_le set_uuid)
{
- size_t i;
- char *s, *d = strdup(buf);
- if (!d)
- return -ENOMEM;
+ char uuid_str[40], set_uuid_str[40];
+ struct cache_sb sb;
- s = strim(d);
+ memset(&sb, 0, sizeof(struct cache_sb));
- for (i = 0; list[i]; i++)
- if (!strcmp(list[i], s))
- break;
+ sb.offset = SB_SECTOR;
+ sb.version = BCACHE_SB_VERSION_BDEV;
+ sb.magic = BCACHE_MAGIC;
+ uuid_generate(sb.uuid.b);
+ sb.set_uuid = set_uuid;
+ sb.bucket_size = bucket_size;
+ sb.block_size = block_size;
- free(d);
+ uuid_unparse(sb.uuid.b, uuid_str);
+ uuid_unparse(sb.set_uuid.b, set_uuid_str);
+ if (label)
+ memcpy(sb.label, label, SB_LABEL_SIZE);
- if (!list[i])
- return -EINVAL;
+ SET_BDEV_CACHE_MODE(&sb, writeback
+ ? CACHE_MODE_WRITEBACK
+ : CACHE_MODE_WRITETHROUGH);
- return i;
-}
+ if (data_offset != BDEV_DATA_START_DEFAULT) {
+ sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
+ sb.data_offset = data_offset;
+ }
-void usage()
-{
- fprintf(stderr,
- "Usage: make-bcache [options] device\n"
- " -C, --cache Format a cache device\n"
- " -B, --bdev Format a backing device\n"
- " -b, --bucket bucket size\n"
- " -w, --block block size (hard sector size of SSD, often 2k)\n"
- " -o, --data-offset data offset in sectors\n"
- " --cset-uuid UUID for the cache set\n"
-// " -U UUID\n"
- " --writeback enable writeback\n"
- " --discard enable discards\n"
- " --wipe-bcache destroy existing bcache data if present\n"
- " --tier set tier of subsequent cache devices\n"
- " --cache_replacement_policy=(lru|fifo|random)\n"
- " -l, --label label\n"
- " -h, --help display this help and exit\n");
- exit(EXIT_FAILURE);
+ sb.csum = csum_set(&sb, BCH_CSUM_CRC64);
+
+ printf("UUID: %s\n"
+ "Set UUID: %s\n"
+ "version: %u\n"
+ "block_size: %u\n"
+ "data_offset: %ju\n",
+ uuid_str, set_uuid_str,
+ (unsigned) sb.version,
+ sb.block_size,
+ data_offset);
+
+ do_write_sb(fd, &sb);
}
-const char * const cache_replacement_policies[] = {
- "lru",
- "fifo",
- "random",
- NULL
-};
-
-static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
- bool writeback, bool discard, bool wipe_bcache,
- unsigned cache_replacement_policy, uint64_t data_offset,
- uuid_t set_uuid, unsigned tier, bool bdev,
- uint16_t nr_in_set, uint16_t nr_this_dev,
- char *label)
+static int dev_open(const char *dev, bool wipe_bcache)
{
- int fd;
- char uuid_str[40], set_uuid_str[40], zeroes[SB_START] = {0};
struct cache_sb sb;
blkid_probe pr;
+ int fd;
if ((fd = open(dev, O_RDWR|O_EXCL)) == -1) {
fprintf(stderr, "Can't open dev %s: %s\n", dev, strerror(errno));
@@ -190,7 +204,7 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
if (pread(fd, &sb, sizeof(sb), SB_START) != sizeof(sb))
exit(EXIT_FAILURE);
- if (!memcmp(sb.magic, bcache_magic, 16) && !wipe_bcache) {
+ if (!memcmp(&sb.magic, &BCACHE_MAGIC, 16) && !wipe_bcache) {
fprintf(stderr, "Already a bcache device on %s, "
"overwrite with --wipe-bcache\n", dev);
exit(EXIT_FAILURE);
@@ -210,94 +224,84 @@ static void write_sb(char *dev, unsigned block_size, unsigned bucket_size,
exit(EXIT_FAILURE);
}
- memset(&sb, 0, sizeof(struct cache_sb));
+ return fd;
+}
- sb.offset = SB_SECTOR;
- sb.version = bdev
- ? BCACHE_SB_VERSION_BDEV
- : BCACHE_SB_VERSION_CDEV;
+static void write_cache_sbs(int *fds, struct cache_sb *sb,
+ unsigned block_size, unsigned bucket_size)
+{
+ char uuid_str[40], set_uuid_str[40];
+ size_t i;
- memcpy(sb.magic, bcache_magic, 16);
- uuid_generate(sb.uuid);
- memcpy(sb.set_uuid, set_uuid, sizeof(sb.set_uuid));
+ sb->offset = SB_SECTOR;
+ sb->version = BCACHE_SB_VERSION_CDEV_V3;
+ sb->magic = BCACHE_MAGIC;
+ sb->bucket_size = bucket_size;
+ sb->block_size = block_size;
+ sb->keys = bch_journal_buckets_offset(sb);
- sb.bucket_size = bucket_size;
- sb.block_size = block_size;
+ /*
+ * don't have a userspace crc32c implementation handy, just always use
+ * crc64
+ */
+ SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
- uuid_unparse(sb.uuid, uuid_str);
- uuid_unparse(sb.set_uuid, set_uuid_str);
- if (label) {
- memcpy(sb.label, label, SB_LABEL_SIZE);
- }
+ for (i = 0; i < sb->nr_in_set; i++) {
+ struct cache_member *m = sb->members + i;
- if (SB_IS_BDEV(&sb)) {
- SET_BDEV_CACHE_MODE(
- &sb, writeback ? CACHE_MODE_WRITEBACK : CACHE_MODE_WRITETHROUGH);
+ sb->uuid = m->uuid;
- if (data_offset != BDEV_DATA_START_DEFAULT) {
- sb.version = BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
- sb.data_offset = data_offset;
- }
+ sb->nbuckets = getblocks(fds[i]) / sb->bucket_size;
+ sb->nr_this_dev = i;
+ sb->first_bucket = (23 / sb->bucket_size) + 1;
- printf("UUID: %s\n"
- "Set UUID: %s\n"
- "version: %u\n"
- "block_size: %u\n"
- "data_offset: %ju\n",
- uuid_str, set_uuid_str,
- (unsigned) sb.version,
- sb.block_size,
- data_offset);
- } else {
- sb.nbuckets = getblocks(fd) / sb.bucket_size;
- sb.nr_in_set = nr_in_set;
- sb.nr_this_dev = nr_this_dev;
- sb.first_bucket = (23 / sb.bucket_size) + 1;
-
- if (sb.nbuckets < 1 << 7) {
- fprintf(stderr, "Not enough buckets: %ju, need %u\n",
- sb.nbuckets, 1 << 7);
+ if (sb->nbuckets < 1 << 7) {
+ fprintf(stderr, "Not enough buckets: %llu, need %u\n",
+ sb->nbuckets, 1 << 7);
exit(EXIT_FAILURE);
}
- SET_CACHE_DISCARD(&sb, discard);
- SET_CACHE_REPLACEMENT(&sb, cache_replacement_policy);
- SET_CACHE_TIER(&sb, tier);
+ sb->csum = csum_set(sb, CACHE_SB_CSUM_TYPE(sb));
+ uuid_unparse(sb->uuid.b, uuid_str);
+ uuid_unparse(sb->set_uuid.b, set_uuid_str);
printf("UUID: %s\n"
"Set UUID: %s\n"
"version: %u\n"
- "nbuckets: %ju\n"
+ "nbuckets: %llu\n"
"block_size: %u\n"
"bucket_size: %u\n"
"nr_in_set: %u\n"
"nr_this_dev: %u\n"
"first_bucket: %u\n",
uuid_str, set_uuid_str,
- (unsigned) sb.version,
- sb.nbuckets,
- sb.block_size,
- sb.bucket_size,
- sb.nr_in_set,
- sb.nr_this_dev,
- sb.first_bucket);
+ (unsigned) sb->version,
+ sb->nbuckets,
+ sb->block_size,
+ sb->bucket_size,
+ sb->nr_in_set,
+ sb->nr_this_dev,
+ sb->first_bucket);
+
+ do_write_sb(fds[i], sb);
}
+}
- sb.csum = csum_set(&sb);
+static void next_cache_device(struct cache_sb *sb,
+ unsigned replication_set,
+ unsigned tier,
+ unsigned replacement_policy,
+ bool discard)
+{
+ struct cache_member *m = sb->members + sb->nr_in_set;
- /* Zero start of disk */
- if (pwrite(fd, zeroes, SB_START, 0) != SB_START) {
- perror("write error\n");
- exit(EXIT_FAILURE);
- }
- /* Write superblock */
- if (pwrite(fd, &sb, sizeof(sb), SB_START) != sizeof(sb)) {
- perror("write error\n");
- exit(EXIT_FAILURE);
- }
+ SET_CACHE_REPLICATION_SET(m, replication_set);
+ SET_CACHE_TIER(m, tier);
+ SET_CACHE_REPLACEMENT(m, replacement_policy);
+ SET_CACHE_DISCARD(m, discard);
+ uuid_generate(m->uuid.b);
- fsync(fd);
- close(fd);
+ sb->nr_in_set++;
}
static unsigned get_blocksize(const char *path)
@@ -344,44 +348,81 @@ static unsigned get_blocksize(const char *path)
return statbuf.st_blksize / 512;
}
+static long strtoul_or_die(const char *p, size_t max, const char *msg)
+{
+ errno = 0;
+ long v = strtol(optarg, NULL, 10);
+ if (errno || v < 0 || v >= max) {
+ fprintf(stderr, "Invalid %s %zi\n", msg, v);
+ exit(EXIT_FAILURE);
+ }
+
+ return v;
+}
+
int main(int argc, char **argv)
{
int c, bdev = -1;
- unsigned i, ncache_devices = 0, nbacking_devices = 0;
- unsigned long tier = 0;
- unsigned cache_device_tier[argc];
- char *cache_devices[argc];
- char *backing_devices[argc];
+ size_t i, nr_backing_devices = 0;
unsigned block_size = 0, bucket_size = 1024;
int writeback = 0, discard = 0, wipe_bcache = 0;
- unsigned cache_replacement_policy = 0;
+ unsigned replication_set = 0, tier = 0, replacement_policy = 0;
uint64_t data_offset = BDEV_DATA_START_DEFAULT;
- uuid_t set_uuid;
char *label = NULL;
- uuid_generate(set_uuid);
-
- struct option opts[] = {
- { "cache", 0, NULL, 'C' },
- { "bdev", 0, NULL, 'B' },
- { "bucket", 1, NULL, 'b' },
- { "block", 1, NULL, 'w' },
- { "writeback", 0, &writeback, 1 },
- { "wipe-bcache", 0, &wipe_bcache, 1 },
- { "discard", 0, &discard, 1 },
- { "cache_replacement_policy", 1, NULL, 'p' },
- { "data_offset", 1, NULL, 'o' },
- { "cset-uuid", 1, NULL, 'u' },
- { "tier", 1, NULL, 't' },
- { "label", 1, NULL, 'l' },
- { "help", 0, NULL, 'h' },
- { NULL, 0, NULL, 0 },
+ const char *cache_devices[argc];
+ int cache_dev_fd[argc];
+
+ const char *backing_devices[argc];
+ int backing_dev_fd[argc];
+ const char *backing_dev_labels[argc];
+
+ enum long_opts {
+ CACHE_SET_UUID = 256,
+ CSUM_TYPE,
+ REPLICATION_SET,
+ META_REPLICAS,
+ DATA_REPLICAS,
};
+ const struct option opts[] = {
+ { "cache", 0, NULL, 'C' },
+ { "bdev", 0, NULL, 'B' },
+ { "wipe-bcache", 0, &wipe_bcache, 1 },
+ { "label", 1, NULL, 'l' },
+ { "cset-uuid", 1, NULL, CACHE_SET_UUID },
+ { "csum-type", 1, NULL, CSUM_TYPE },
+
+ { "bucket", 1, NULL, 'b' },
+ { "block", 1, NULL, 'w' },
+
+ { "replication-set", 1, NULL, REPLICATION_SET },
+ { "meta-replicas", 1, NULL, META_REPLICAS},
+ { "data-replicas", 1, NULL, DATA_REPLICAS },
+ { "tier", 1, NULL, 't' },
+ { "cache_replacement_policy", 1, NULL, 'p' },
+ { "discard", 0, &discard, 1 },
+
+ { "writeback", 0, &writeback, 1 },
+ { "data_offset", 1, NULL, 'o' },
+
+ { "help", 0, NULL, 'h' },
+ { NULL, 0, NULL, 0 },
+ };
+
+ struct cache_sb *cache_set_sb = calloc(1, sizeof(*cache_set_sb) +
+ sizeof(struct cache_member) * argc);
+
+ uuid_generate(cache_set_sb->set_uuid.b);
+ SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb, BCH_CSUM_CRC32C);
+ SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb, 1);
+ SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb, 1);
+
while ((c = getopt_long(argc, argv,
"-hCBU:w:b:l:",
- opts, NULL)) != -1)
+ opts, NULL)) != -1) {
+
switch (c) {
case 'C':
bdev = 0;
@@ -389,24 +430,56 @@ int main(int argc, char **argv)
case 'B':
bdev = 1;
break;
+ case 'l':
+ label = optarg;
+ memcpy(cache_set_sb->label, label,
+ sizeof(cache_set_sb->label));
+ break;
+ case CACHE_SET_UUID:
+ if (uuid_parse(optarg, cache_set_sb->set_uuid.b)) {
+ fprintf(stderr, "Bad uuid\n");
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case CSUM_TYPE:
+ SET_CACHE_PREFERRED_CSUM_TYPE(cache_set_sb,
+ read_string_list_or_die(optarg, csum_types,
+ "csum type"));
+ break;
+
case 'b':
bucket_size = hatoi_validate(optarg, "bucket size");
break;
case 'w':
block_size = hatoi_validate(optarg, "block size");
break;
-#if 0
- case 'U':
- if (uuid_parse(optarg, sb.uuid)) {
- fprintf(stderr, "Bad uuid\n");
- exit(EXIT_FAILURE);
- }
+
+ case REPLICATION_SET:
+ replication_set = strtoul_or_die(optarg,
+ CACHE_REPLICATION_SET_MAX,
+ "replication set");
+ break;
+ case META_REPLICAS:
+ SET_CACHE_SET_META_REPLICAS_WANT(cache_set_sb,
+ strtoul_or_die(optarg,
+ CACHE_SET_META_REPLICAS_WANT_MAX,
+ "meta replicas"));
+ break;
+ case DATA_REPLICAS:
+ SET_CACHE_SET_DATA_REPLICAS_WANT(cache_set_sb,
+ strtoul_or_die(optarg,
+ CACHE_SET_DATA_REPLICAS_WANT_MAX,
+ "data replicas"));
+ break;
+ case 't':
+ tier = strtoul_or_die(optarg, CACHE_TIERS, "tier");
break;
-#endif
case 'p':
- cache_replacement_policy = read_string_list(optarg,
- cache_replacement_policies);
+ replacement_policy = read_string_list_or_die(optarg,
+ replacement_policies,
+ "cache replacement policy");
break;
+
case 'o':
data_offset = atoll(optarg);
if (data_offset < BDEV_DATA_START_DEFAULT) {
@@ -415,22 +488,6 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
break;
- case 'u':
- if (uuid_parse(optarg, set_uuid)) {
- fprintf(stderr, "Bad uuid\n");
- exit(EXIT_FAILURE);
- }
- break;
- case 'l':
- label = optarg;
- break;
- case 't':
- tier = strtoul(optarg, NULL, 10);
- if (tier >= CACHE_TIERS) {
- fprintf(stderr, "Invalid tier %lu\n", tier);
- exit(EXIT_FAILURE);
- }
- break;
case 'h':
usage();
break;
@@ -441,16 +498,22 @@ int main(int argc, char **argv)
}
if (bdev) {
- backing_devices[nbacking_devices++] = optarg;
+ backing_dev_labels[nr_backing_devices] = label;
+ backing_devices[nr_backing_devices++] = optarg;
} else {
- cache_device_tier[ncache_devices] = tier;
- cache_devices[ncache_devices++] = optarg;
+ cache_devices[cache_set_sb->nr_in_set] = optarg;
+ next_cache_device(cache_set_sb,
+ replication_set,
+ tier,
+ replacement_policy,
+ discard);
}
break;
}
+ }
- if (!ncache_devices && !nbacking_devices) {
+ if (!cache_set_sb->nr_in_set && !nr_backing_devices) {
fprintf(stderr, "Please supply a device\n");
usage();
}
@@ -461,27 +524,29 @@ int main(int argc, char **argv)
}
if (!block_size) {
- for (i = 0; i < ncache_devices; i++)
+ for (i = 0; i < cache_set_sb->nr_in_set; i++)
block_size = max(block_size,
get_blocksize(cache_devices[i]));
- for (i = 0; i < nbacking_devices; i++)
+ for (i = 0; i < nr_backing_devices; i++)
block_size = max(block_size,
get_blocksize(backing_devices[i]));
}
- for (i = 0; i < ncache_devices; i++)
- write_sb(cache_devices[i], block_size, bucket_size,
- writeback, discard, wipe_bcache,
- cache_replacement_policy, data_offset,
- set_uuid, cache_device_tier[i], false,
- ncache_devices, i, label);
-
- for (i = 0; i < nbacking_devices; i++)
- write_sb(backing_devices[i], block_size, bucket_size,
- writeback, discard, wipe_bcache,
- cache_replacement_policy, data_offset,
- set_uuid, 0, true, nbacking_devices, i, label);
+ for (i = 0; i < cache_set_sb->nr_in_set; i++)
+ cache_dev_fd[i] = dev_open(cache_devices[i], wipe_bcache);
+
+ for (i = 0; i < nr_backing_devices; i++)
+ backing_dev_fd[i] = dev_open(backing_devices[i], wipe_bcache);
+
+ write_cache_sbs(cache_dev_fd, cache_set_sb, block_size, bucket_size);
+
+ for (i = 0; i < nr_backing_devices; i++)
+ write_backingdev_sb(backing_dev_fd[i],
+ block_size, bucket_size,
+ writeback, data_offset,
+ backing_dev_labels[i],
+ cache_set_sb->set_uuid);
return 0;
}