summaryrefslogtreecommitdiff
path: root/libbcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcache.c')
-rw-r--r--libbcache.c207
1 files changed, 117 insertions, 90 deletions
diff --git a/libbcache.c b/libbcache.c
index 6908ead9..0cfafbbc 100644
--- a/libbcache.c
+++ b/libbcache.c
@@ -23,66 +23,82 @@
#define BCH_MIN_NR_NBUCKETS (1 << 10)
-/* first bucket should start 1 mb in, in sectors: */
-#define FIRST_BUCKET_OFFSET (1 << 11)
-
/* minimum size filesystem we can create, given a bucket size: */
static u64 min_size(unsigned bucket_size)
{
- return (DIV_ROUND_UP(FIRST_BUCKET_OFFSET, bucket_size) +
- BCH_MIN_NR_NBUCKETS) * bucket_size;
+ return BCH_MIN_NR_NBUCKETS * bucket_size;
}
-static void init_layout(struct bch_sb_layout *l)
+static void init_layout(struct bch_sb_layout *l, unsigned block_size,
+ u64 start, u64 end)
{
+ unsigned sb_size;
+ u64 backup; /* offset of 2nd sb */
+
memset(l, 0, sizeof(*l));
+ if (start != BCH_SB_SECTOR)
+ start = round_up(start, block_size);
+ end = round_down(end, block_size);
+
+ if (start >= end)
+ die("insufficient space for superblocks");
+
+ /*
+ * Create two superblocks in the allowed range: reserve a maximum of 64k
+ */
+ sb_size = min_t(u64, 128, end - start / 2);
+
+ backup = start + sb_size;
+ backup = round_up(backup, block_size);
+
+ backup = min(backup, end);
+
+ sb_size = min(end - backup, backup- start);
+ sb_size = rounddown_pow_of_two(sb_size);
+
+ if (sb_size < 8)
+ die("insufficient space for superblocks");
+
l->magic = BCACHE_MAGIC;
l->layout_type = 0;
l->nr_superblocks = 2;
- l->sb_max_size_bits = 7;
- l->sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
- l->sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR +
- (1 << l->sb_max_size_bits));
+ l->sb_max_size_bits = ilog2(sb_size);
+ l->sb_offset[0] = cpu_to_le64(start);
+ l->sb_offset[1] = cpu_to_le64(backup);
}
-void bcache_format(struct dev_opts *devs, size_t nr_devs,
- unsigned block_size,
- unsigned btree_node_size,
- unsigned meta_csum_type,
- unsigned data_csum_type,
- unsigned compression_type,
- const char *passphrase,
- unsigned meta_replicas,
- unsigned data_replicas,
- unsigned on_error_action,
- unsigned max_journal_entry_size,
- char *label,
- uuid_le uuid)
+struct bch_sb *bcache_format(struct format_opts opts,
+ struct dev_opts *devs, size_t nr_devs)
{
struct bch_sb *sb;
struct dev_opts *i;
struct bch_sb_field_members *mi;
- unsigned u64s, j;
+ unsigned u64s;
/* calculate block size: */
- if (!block_size)
+ if (!opts.block_size)
for (i = devs; i < devs + nr_devs; i++)
- block_size = max(block_size,
- get_blocksize(i->path, i->fd));
+ opts.block_size = max(opts.block_size,
+ get_blocksize(i->path, i->fd));
/* calculate bucket sizes: */
for (i = devs; i < devs + nr_devs; i++) {
+ if (!i->sb_offset) {
+ i->sb_offset = BCH_SB_SECTOR;
+ i->sb_end = BCH_SB_SECTOR + 256;
+ }
+
if (!i->size)
i->size = get_size(i->path, i->fd) >> 9;
if (!i->bucket_size) {
- if (i->size < min_size(block_size))
+ if (i->size < min_size(opts.block_size))
die("cannot format %s, too small (%llu sectors, min %llu)",
- i->path, i->size, min_size(block_size));
+ i->path, i->size, min_size(opts.block_size));
/* Want a bucket size of at least 128k, if possible: */
- i->bucket_size = max(block_size, 256U);
+ i->bucket_size = max(opts.block_size, 256U);
if (i->size >= min_size(i->bucket_size)) {
unsigned scale = max(1,
@@ -99,34 +115,36 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
}
}
- /* first bucket: 1 mb in */
- i->first_bucket = DIV_ROUND_UP(FIRST_BUCKET_OFFSET, i->bucket_size);
i->nbuckets = i->size / i->bucket_size;
- if (i->bucket_size < block_size)
+ if (i->bucket_size < opts.block_size)
die("Bucket size cannot be smaller than block size");
- if (i->nbuckets - i->first_bucket < BCH_MIN_NR_NBUCKETS)
+ if (i->nbuckets < BCH_MIN_NR_NBUCKETS)
die("Not enough buckets: %llu, need %u (bucket size %u)",
- i->nbuckets - i->first_bucket, BCH_MIN_NR_NBUCKETS,
- i->bucket_size);
+ i->nbuckets, BCH_MIN_NR_NBUCKETS, i->bucket_size);
}
/* calculate btree node size: */
- if (!btree_node_size) {
+ if (!opts.btree_node_size) {
/* 256k default btree node size */
- btree_node_size = 512;
+ opts.btree_node_size = 512;
for (i = devs; i < devs + nr_devs; i++)
- btree_node_size = min(btree_node_size, i->bucket_size);
+ opts.btree_node_size =
+ min(opts.btree_node_size, i->bucket_size);
}
- if (!max_journal_entry_size) {
+ if (!opts.max_journal_entry_size) {
/* 2 MB default: */
- max_journal_entry_size = 4096;
+ opts.max_journal_entry_size = 4096;
}
- max_journal_entry_size = roundup_pow_of_two(max_journal_entry_size);
+ opts.max_journal_entry_size =
+ roundup_pow_of_two(opts.max_journal_entry_size);
+
+ if (uuid_is_null(opts.uuid.b))
+ uuid_generate(opts.uuid.b);
sb = calloc(1, sizeof(*sb) +
sizeof(struct bch_sb_field_members) +
@@ -135,35 +153,29 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
sb->version = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
sb->magic = BCACHE_MAGIC;
- sb->block_size = cpu_to_le16(block_size);
- sb->user_uuid = uuid;
+ sb->block_size = cpu_to_le16(opts.block_size);
+ sb->user_uuid = opts.uuid;
sb->nr_devices = nr_devs;
- init_layout(&sb->layout);
-
uuid_generate(sb->uuid.b);
- if (label)
- strncpy((char *) sb->label, label, sizeof(sb->label));
+ if (opts.label)
+ strncpy((char *) sb->label, opts.label, sizeof(sb->label));
- /*
- * don't have a userspace crc32c implementation handy, just always use
- * crc64
- */
- SET_BCH_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
- SET_BCH_SB_META_CSUM_TYPE(sb, meta_csum_type);
- SET_BCH_SB_DATA_CSUM_TYPE(sb, data_csum_type);
- SET_BCH_SB_COMPRESSION_TYPE(sb, compression_type);
+ SET_BCH_SB_CSUM_TYPE(sb, opts.meta_csum_type);
+ SET_BCH_SB_META_CSUM_TYPE(sb, opts.meta_csum_type);
+ SET_BCH_SB_DATA_CSUM_TYPE(sb, opts.data_csum_type);
+ SET_BCH_SB_COMPRESSION_TYPE(sb, opts.compression_type);
- SET_BCH_SB_BTREE_NODE_SIZE(sb, btree_node_size);
+ SET_BCH_SB_BTREE_NODE_SIZE(sb, opts.btree_node_size);
SET_BCH_SB_GC_RESERVE(sb, 8);
- SET_BCH_SB_META_REPLICAS_WANT(sb, meta_replicas);
- SET_BCH_SB_META_REPLICAS_HAVE(sb, meta_replicas);
- SET_BCH_SB_DATA_REPLICAS_WANT(sb, data_replicas);
- SET_BCH_SB_DATA_REPLICAS_HAVE(sb, data_replicas);
- SET_BCH_SB_ERROR_ACTION(sb, on_error_action);
+ SET_BCH_SB_META_REPLICAS_WANT(sb, opts.meta_replicas);
+ SET_BCH_SB_META_REPLICAS_HAVE(sb, opts.meta_replicas);
+ SET_BCH_SB_DATA_REPLICAS_WANT(sb, opts.data_replicas);
+ SET_BCH_SB_DATA_REPLICAS_HAVE(sb, opts.data_replicas);
+ SET_BCH_SB_ERROR_ACTION(sb, opts.on_error_action);
SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
- SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(max_journal_entry_size));
+ SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(opts.max_journal_entry_size));
struct timespec now;
if (clock_gettime(CLOCK_REALTIME, &now))
@@ -172,7 +184,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
sb->time_precision = cpu_to_le32(1);
- if (passphrase) {
+ if (opts.encrypted) {
struct bch_sb_field_crypt *crypt = vstruct_end(sb);
u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
@@ -181,7 +193,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
crypt->field.u64s = cpu_to_le32(u64s);
crypt->field.type = BCH_SB_FIELD_crypt;
- bch_sb_crypt_init(sb, crypt, passphrase);
+ bch_sb_crypt_init(sb, crypt, opts.passphrase);
SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
}
@@ -198,7 +210,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
uuid_generate(m->uuid.b);
m->nbuckets = cpu_to_le64(i->nbuckets);
- m->first_bucket = cpu_to_le16(i->first_bucket);
+ m->first_bucket = 0;
m->bucket_size = cpu_to_le16(i->bucket_size);
SET_BCH_MEMBER_TIER(m, i->tier);
@@ -209,42 +221,49 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
for (i = devs; i < devs + nr_devs; i++) {
sb->dev_idx = i - devs;
- static const char zeroes[BCH_SB_SECTOR << 9];
- struct nonce nonce = { 0 };
+ init_layout(&sb->layout, opts.block_size,
+ i->sb_offset, i->sb_end);
- /* Zero start of disk */
- xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
+ if (i->sb_offset == BCH_SB_SECTOR) {
+ /* Zero start of disk */
+ static const char zeroes[BCH_SB_SECTOR << 9];
- xpwrite(i->fd, &sb->layout, sizeof(sb->layout),
- BCH_SB_LAYOUT_SECTOR << 9);
-
- for (j = 0; j < sb->layout.nr_superblocks; j++) {
- sb->offset = sb->layout.sb_offset[j];
-
- sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb),
- nonce, sb);
- xpwrite(i->fd, sb, vstruct_bytes(sb),
- le64_to_cpu(sb->offset) << 9);
+ xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
}
- fsync(i->fd);
+ bcache_super_write(i->fd, sb);
close(i->fd);
}
- bcache_super_print(sb, HUMAN_READABLE);
+ return sb;
+}
+
+void bcache_super_write(int fd, struct bch_sb *sb)
+{
+ struct nonce nonce = { 0 };
+
+ for (unsigned i = 0; i < sb->layout.nr_superblocks; i++) {
+ sb->offset = sb->layout.sb_offset[i];
+
+ if (sb->offset == BCH_SB_SECTOR) {
+ /* Write backup layout */
+ xpwrite(fd, &sb->layout, sizeof(sb->layout),
+ BCH_SB_LAYOUT_SECTOR << 9);
+ }
+
+ sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb), nonce, sb);
+ xpwrite(fd, sb, vstruct_bytes(sb),
+ le64_to_cpu(sb->offset) << 9);
+ }
- free(sb);
+ fsync(fd);
}
-struct bch_sb *bcache_super_read(const char *path)
+struct bch_sb *__bcache_super_read(int fd, u64 sector)
{
struct bch_sb sb, *ret;
- int fd = open(path, O_RDONLY);
- if (fd < 0)
- die("couldn't open %s", path);
-
- xpread(fd, &sb, sizeof(sb), BCH_SB_SECTOR << 9);
+ xpread(fd, &sb, sizeof(sb), sector << 9);
if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
die("not a bcache superblock");
@@ -253,11 +272,19 @@ struct bch_sb *bcache_super_read(const char *path)
ret = malloc(bytes);
- xpread(fd, ret, bytes, BCH_SB_SECTOR << 9);
+ xpread(fd, ret, bytes, sector << 9);
return ret;
}
+struct bch_sb *bcache_super_read(const char *path)
+{
+ int fd = xopen(path, O_RDONLY);
+ struct bch_sb *sb = __bcache_super_read(fd, BCH_SB_SECTOR);
+ close(fd);
+ return sb;
+}
+
void bcache_super_print(struct bch_sb *sb, int units)
{
struct bch_sb_field_members *mi;