summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-10-03 19:22:17 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-02-28 03:05:38 -0900
commita5b5eba7f788bb77cf57f9c94f3474a2d439ab0b (patch)
tree278813d1b1a9024174531376d41a2ba04a3b27f6
parente4d1c93d85a5b86c04599bfc9f658308d741fd41 (diff)
New on disk format - encryption
-rw-r--r--.bcache_revision2
-rw-r--r--Makefile7
-rw-r--r--bcache-userspace-shim.c1
-rw-r--r--bcache.c5
-rw-r--r--cmd_debug.c32
-rw-r--r--cmd_device.c8
-rw-r--r--cmd_format.c28
-rw-r--r--cmd_key.c62
-rw-r--r--cmds.h1
-rw-r--r--crypto.c103
-rw-r--r--crypto.h13
-rw-r--r--include/crypto/algapi.h184
-rw-r--r--include/crypto/chacha20.h11
-rw-r--r--include/crypto/hash.h115
-rw-r--r--include/crypto/internal/hash.h3
-rw-r--r--include/crypto/poly1305.h34
-rw-r--r--include/crypto/sha.h110
-rw-r--r--include/crypto/sha1_base.h107
-rw-r--r--include/keys/user-type.h6
-rw-r--r--include/linux/bcache.h758
-rw-r--r--include/linux/crypto.h800
-rw-r--r--include/linux/cryptohash.h20
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/key.h50
-rw-r--r--include/linux/mempool.h5
-rw-r--r--include/linux/page.h7
-rw-r--r--include/linux/scatterlist.h111
-rw-r--r--include/linux/time64.h18
-rw-r--r--include/trace/events/bcache.h44
-rw-r--r--libbcache.c275
-rw-r--r--libbcache.h7
-rw-r--r--libbcache/acl.c2
-rw-r--r--libbcache/alloc.c93
-rw-r--r--libbcache/alloc_types.h2
-rw-r--r--libbcache/bcache.h46
-rw-r--r--libbcache/bkey.c172
-rw-r--r--libbcache/bkey.h28
-rw-r--r--libbcache/blockdev.c41
-rw-r--r--libbcache/bset.c4
-rw-r--r--libbcache/bset.h17
-rw-r--r--libbcache/btree_cache.c2
-rw-r--r--libbcache/btree_gc.c71
-rw-r--r--libbcache/btree_gc.h2
-rw-r--r--libbcache/btree_io.c201
-rw-r--r--libbcache/btree_types.h16
-rw-r--r--libbcache/btree_update.c31
-rw-r--r--libbcache/btree_update.h24
-rw-r--r--libbcache/buckets.c13
-rw-r--r--libbcache/buckets.h2
-rw-r--r--libbcache/chardev.c37
-rw-r--r--libbcache/checksum.c450
-rw-r--r--libbcache/checksum.h129
-rw-r--r--libbcache/compress.c144
-rw-r--r--libbcache/compress.h5
-rw-r--r--libbcache/debug.c12
-rw-r--r--libbcache/dirent.c31
-rw-r--r--libbcache/extents.c443
-rw-r--r--libbcache/extents.h211
-rw-r--r--libbcache/fs-gc.c163
-rw-r--r--libbcache/fs-io.c159
-rw-r--r--libbcache/fs.c156
-rw-r--r--libbcache/fs.h9
-rw-r--r--libbcache/inode.c288
-rw-r--r--libbcache/inode.h43
-rw-r--r--libbcache/io.c116
-rw-r--r--libbcache/io.h2
-rw-r--r--libbcache/io_types.h9
-rw-r--r--libbcache/journal.c583
-rw-r--r--libbcache/journal.h29
-rw-r--r--libbcache/journal_types.h6
-rw-r--r--libbcache/migrate.c16
-rw-r--r--libbcache/move.c27
-rw-r--r--libbcache/movinggc.c2
-rw-r--r--libbcache/notify.c4
-rw-r--r--libbcache/opts.c19
-rw-r--r--libbcache/opts.h88
-rw-r--r--libbcache/siphash.c99
-rw-r--r--libbcache/str_hash.h84
-rw-r--r--libbcache/super-io.c798
-rw-r--r--libbcache/super-io.h141
-rw-r--r--libbcache/super.c945
-rw-r--r--libbcache/super.h48
-rw-r--r--libbcache/super_types.h2
-rw-r--r--libbcache/sysfs.c110
-rw-r--r--libbcache/tier.c3
-rw-r--r--libbcache/vstructs.h62
-rw-r--r--libbcache/xattr.c35
-rw-r--r--linux/crypto/algapi.c315
-rw-r--r--linux/crypto/api.c227
-rw-r--r--linux/crypto/blkcipher.c47
-rw-r--r--linux/crypto/chacha20_generic.c99
-rw-r--r--linux/crypto/cipher.c123
-rw-r--r--linux/crypto/internal.h63
-rw-r--r--linux/crypto/poly1305_generic.c76
-rw-r--r--linux/crypto/sha1_generic.c85
-rw-r--r--linux/crypto/sha256_generic.c69
-rw-r--r--linux/crypto/shash.c252
-rw-r--r--linux/lz4hc_compress.c454
-rw-r--r--linux/sha1.c201
99 files changed, 5438 insertions, 5777 deletions
diff --git a/.bcache_revision b/.bcache_revision
index 5caaaba2..8fb728e4 100644
--- a/.bcache_revision
+++ b/.bcache_revision
@@ -1 +1 @@
-BCACHE_REVISION=76e3b2312705df2cb5adb8834bc6df56a288932e
+BCACHE_REVISION=561f3067172cbfc63a680cfb670d558724441123
diff --git a/Makefile b/Makefile
index a3bf8d8e..bc0402c3 100644
--- a/Makefile
+++ b/Makefile
@@ -20,9 +20,10 @@ else
LDFLAGS+=-flto
endif
-PKGCONFIG_LIBS="blkid uuid liburcu"
+PKGCONFIG_LIBS="blkid uuid liburcu libsodium"
CFLAGS+=`pkg-config --cflags ${PKGCONFIG_LIBS}`
-LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` -lm -lpthread -lrt
+LDLIBS+=`pkg-config --libs ${PKGCONFIG_LIBS}` \
+ -lm -lpthread -lrt -lscrypt -lkeyutils
ifeq ($(PREFIX),/usr)
ROOT_SBINDIR=/sbin
@@ -48,7 +49,9 @@ OBJS=bcache.o \
cmd_fs.o \
cmd_fsck.o \
cmd_format.o \
+ cmd_key.o \
cmd_run.o \
+ crypto.o \
libbcache.o \
qcow2.o \
tools-util.o \
diff --git a/bcache-userspace-shim.c b/bcache-userspace-shim.c
index 9be5b507..8634d8f7 100644
--- a/bcache-userspace-shim.c
+++ b/bcache-userspace-shim.c
@@ -144,6 +144,7 @@ enum fsck_err_opts fsck_err_opt;
#include "six.c"
//#include "stats.c"
#include "super.c"
+#include "super-io.c"
//#include "sysfs.c"
#include "tier.c"
#include "trace.c"
diff --git a/bcache.c b/bcache.c
index 1fb1a55e..ac9eb07e 100644
--- a/bcache.c
+++ b/bcache.c
@@ -30,6 +30,7 @@ static void usage(void)
"\n"
"Commands for formatting, startup and shutdown:\n"
" format Format a new filesystem\n"
+ " unlock Unlock an encrypted filesystem prior to running/mounting\n"
" assemble Assemble an existing multi device filesystem\n"
" incremental Incrementally assemble an existing multi device filesystem\n"
" run Start a partially assembled filesystem\n"
@@ -46,6 +47,7 @@ static void usage(void)
"\n"
"Repair:\n"
" bcache fsck Check an existing filesystem for errors\n"
+ "\n"
"Debug:\n"
" bcache dump Dump filesystem metadata to a qcow2 image\n"
" bcache list List filesystem metadata in textual form\n");
@@ -94,6 +96,9 @@ int main(int argc, char *argv[])
if (!strcmp(cmd, "fsck"))
return cmd_fsck(argc, argv);
+ if (!strcmp(cmd, "unlock"))
+ return cmd_unlock(argc, argv);
+
if (!strcmp(cmd, "dump"))
return cmd_dump(argc, argv);
if (!strcmp(cmd, "list"))
diff --git a/cmd_debug.c b/cmd_debug.c
index 0813d292..df23ae10 100644
--- a/cmd_debug.c
+++ b/cmd_debug.c
@@ -27,21 +27,27 @@ static void dump_usage(void)
"Report bugs to <linux-bcache@vger.kernel.org>");
}
-void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
+static void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
{
- struct cache_sb *sb = ca->disk_sb.sb;
+ struct bch_sb *sb = ca->disk_sb.sb;
sparse_data data;
unsigned i;
darray_init(data);
/* Superblock: */
- data_add(&data, SB_SECTOR << 9, __set_bytes(sb, le16_to_cpu(sb->u64s)));
+ data_add(&data, BCH_SB_LAYOUT_SECTOR << 9,
+ sizeof(struct bch_sb_layout));
+
+ for (i = 0; i < sb->layout.nr_superblocks; i++)
+ data_add(&data,
+ le64_to_cpu(sb->layout.sb_offset[i]) << 9,
+ vstruct_bytes(sb));
/* Journal: */
- for (i = 0; i < bch_nr_journal_buckets(ca->disk_sb.sb); i++)
+ for (i = 0; i < ca->journal.nr; i++)
if (ca->journal.bucket_seq[i] >= c->journal.last_seq_ondisk) {
- u64 bucket = journal_bucket(ca->disk_sb.sb, i);
+ u64 bucket = ca->journal.buckets[i];
data_add(&data,
bucket_bytes(ca) * bucket,
@@ -64,7 +70,7 @@ void dump_one_device(struct cache_set *c, struct cache *ca, int fd)
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
extent_for_each_ptr(e, ptr)
- if (ptr->dev == ca->sb.nr_this_dev)
+ if (ptr->dev == ca->dev_idx)
data_add(&data,
ptr->offset << 9,
b->written << 9);
@@ -120,13 +126,13 @@ int cmd_dump(int argc, char *argv[])
down_read(&c->gc_lock);
- for (i = 0; i < c->sb.nr_in_set; i++)
+ for (i = 0; i < c->sb.nr_devices; i++)
if (c->cache[i])
nr_devices++;
BUG_ON(!nr_devices);
- for (i = 0; i < c->sb.nr_in_set; i++) {
+ for (i = 0; i < c->sb.nr_devices; i++) {
int mode = O_WRONLY|O_CREAT|O_TRUNC;
if (!force)
@@ -155,8 +161,8 @@ int cmd_dump(int argc, char *argv[])
return 0;
}
-void list_keys(struct cache_set *c, enum btree_id btree_id,
- struct bpos start, struct bpos end, int mode)
+static void list_keys(struct cache_set *c, enum btree_id btree_id,
+ struct bpos start, struct bpos end, int mode)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -173,8 +179,8 @@ void list_keys(struct cache_set *c, enum btree_id btree_id,
bch_btree_iter_unlock(&iter);
}
-void list_btree_formats(struct cache_set *c, enum btree_id btree_id,
- struct bpos start, struct bpos end, int mode)
+static void list_btree_formats(struct cache_set *c, enum btree_id btree_id,
+ struct bpos start, struct bpos end, int mode)
{
struct btree_iter iter;
struct btree *b;
@@ -190,7 +196,7 @@ void list_btree_formats(struct cache_set *c, enum btree_id btree_id,
bch_btree_iter_unlock(&iter);
}
-struct bpos parse_pos(char *buf)
+static struct bpos parse_pos(char *buf)
{
char *s = buf;
char *inode = strsep(&s, ":");
diff --git a/cmd_device.c b/cmd_device.c
index ecb63bb4..1c5208af 100644
--- a/cmd_device.c
+++ b/cmd_device.c
@@ -103,7 +103,7 @@ int cmd_device_show(int argc, char *argv[])
struct bcache_dev devices[256];
unsigned i, j, nr_devices = 0, nr_active_tiers = 0;
- unsigned tiers[CACHE_TIERS]; /* number of devices in each tier */
+ unsigned tiers[BCH_TIER_MAX]; /* number of devices in each tier */
memset(tiers, 0, sizeof(tiers));
while ((entry = readdir(fs.sysfs))) {
@@ -133,14 +133,14 @@ int cmd_device_show(int argc, char *argv[])
close(fd);
}
- for (i = 0; i < CACHE_TIERS; i++)
+ for (i = 0; i < BCH_TIER_MAX; i++)
if (tiers[i])
nr_active_tiers++;
/* Print out devices sorted by tier: */
bool first = true;
- for (i = 0; i < CACHE_TIERS; i++) {
+ for (i = 0; i < BCH_TIER_MAX; i++) {
if (!tiers[i])
continue;
@@ -168,7 +168,7 @@ int cmd_device_show(int argc, char *argv[])
int cmd_device_show(int argc, char *argv[])
{
- struct cache_sb *sb;
+ struct bch_sb *sb;
if (argc != 2)
die("please supply a single device");
diff --git a/cmd_format.c b/cmd_format.c
index b955b416..2b1453ee 100644
--- a/cmd_format.c
+++ b/cmd_format.c
@@ -24,6 +24,7 @@
#include "cmds.h"
#include "libbcache.h"
+#include "crypto.h"
#include "opts.h"
#include "util.h"
@@ -80,6 +81,7 @@ static void usage(void)
" --metadata_checksum_type=(none|crc32c|crc64)\n"
" --data_checksum_type=(none|crc32c|crc64)\n"
" --compression_type=(none|lz4|gzip)\n"
+ " --encrypted\n"
" --error_action=(continue|readonly|panic)\n"
" Action to take on filesystem error\n"
" --max_journal_entry_size=size\n"
@@ -107,6 +109,7 @@ static void usage(void)
OPT(0, metadata_checksum_type, required_argument) \
OPT(0, data_checksum_type, required_argument) \
OPT(0, compression_type, required_argument) \
+ OPT(0, encrypted, no_argument) \
OPT('e', error_action, required_argument) \
OPT(0, max_journal_entry_size, required_argument) \
OPT('L', label, required_argument) \
@@ -164,6 +167,7 @@ int cmd_format(int argc, char *argv[])
unsigned meta_csum_type = BCH_CSUM_CRC32C;
unsigned data_csum_type = BCH_CSUM_CRC32C;
unsigned compression_type = BCH_COMPRESSION_NONE;
+ bool encrypted = false;
unsigned on_error_action = BCH_ON_ERROR_RO;
char *label = NULL;
uuid_le uuid;
@@ -208,6 +212,9 @@ int cmd_format(int argc, char *argv[])
bch_compression_types,
"compression type");
break;
+ case Opt_encrypted:
+ encrypted = true;
+ break;
case Opt_error_action:
case 'e':
on_error_action = read_string_list_or_die(optarg,
@@ -242,7 +249,7 @@ int cmd_format(int argc, char *argv[])
case Opt_tier:
case 't':
if (kstrtouint(optarg, 10, &tier) ||
- tier >= CACHE_TIERS)
+ tier >= BCH_TIER_MAX)
die("invalid tier");
break;
case Opt_discard:
@@ -270,6 +277,24 @@ int cmd_format(int argc, char *argv[])
if (uuid_is_null(uuid.b))
uuid_generate(uuid.b);
+ if (encrypted) {
+ passphrase = read_passphrase("Enter passphrase: ");
+
+ if (isatty(STDIN_FILENO)) {
+ char *pass2 =
+ read_passphrase("Enter same passphrase again: ");
+
+ if (strcmp(passphrase, pass2)) {
+ memzero_explicit(passphrase, strlen(passphrase));
+ memzero_explicit(pass2, strlen(pass2));
+ die("Passphrases do not match");
+ }
+
+ memzero_explicit(pass2, strlen(pass2));
+ free(pass2);
+ }
+ }
+
darray_foreach(dev, devices)
dev->fd = open_for_format(dev->path, force);
@@ -279,6 +304,7 @@ int cmd_format(int argc, char *argv[])
meta_csum_type,
data_csum_type,
compression_type,
+ passphrase,
1,
1,
on_error_action,
diff --git a/cmd_key.c b/cmd_key.c
new file mode 100644
index 00000000..587ecbe3
--- /dev/null
+++ b/cmd_key.c
@@ -0,0 +1,62 @@
+#include <errno.h>
+#include <unistd.h>
+#include <keyutils.h>
+#include <uuid/uuid.h>
+
+#include "cmds.h"
+#include "checksum.h"
+#include "crypto.h"
+#include "libbcache.h"
+
+int cmd_unlock(int argc, char *argv[])
+{
+ struct bch_encrypted_key sb_key;
+ struct bch_key passphrase_key;
+ struct bch_sb *sb;
+ struct bch_sb_field_crypt *crypt;
+ char *passphrase;
+ char uuid[40];
+ char description[60];
+
+ if (argc != 2)
+ die("please supply a single device");
+
+ sb = bcache_super_read(argv[1]);
+
+ crypt = bch_sb_get_crypt(sb);
+ if (!crypt)
+ die("filesystem is not encrypted");
+
+ sb_key = crypt->key;
+
+ if (!bch_key_is_encrypted(&sb_key))
+ die("filesystem does not have encryption key");
+
+ passphrase = read_passphrase("Enter passphrase: ");
+ derive_passphrase(crypt, &passphrase_key, passphrase);
+
+ /* Check if the user supplied the correct passphrase: */
+ if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
+ &sb_key, sizeof(sb_key)))
+ die("error encrypting key");
+
+ if (bch_key_is_encrypted(&sb_key))
+ die("incorrect passphrase");
+
+ uuid_unparse_lower(sb->user_uuid.b, uuid);
+ sprintf(description, "bcache:%s", uuid);
+
+ if (add_key("logon", description,
+ &passphrase_key, sizeof(passphrase_key),
+ KEY_SPEC_USER_KEYRING) < 0 ||
+ add_key("user", description,
+ &passphrase_key, sizeof(passphrase_key),
+ KEY_SPEC_USER_KEYRING) < 0)
+ die("add_key error: %s", strerror(errno));
+
+ memzero_explicit(&sb_key, sizeof(sb_key));
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+ memzero_explicit(passphrase, strlen(passphrase));
+ free(passphrase);
+ return 0;
+}
diff --git a/cmds.h b/cmds.h
index c0c8aa56..946acfda 100644
--- a/cmds.h
+++ b/cmds.h
@@ -11,6 +11,7 @@
int cmd_format(int argc, char *argv[]);
+int cmd_unlock(int argc, char *argv[]);
int cmd_assemble(int argc, char *argv[]);
int cmd_incremental(int argc, char *argv[]);
int cmd_run(int argc, char *argv[]);
diff --git a/crypto.c b/crypto.c
new file mode 100644
index 00000000..86da70a1
--- /dev/null
+++ b/crypto.c
@@ -0,0 +1,103 @@
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <termios.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/random.h>
+#include <libscrypt.h>
+
+#include "checksum.h"
+#include "crypto.h"
+
+char *read_passphrase(const char *prompt)
+{
+ char *buf = NULL;
+ size_t buflen = 0;
+ ssize_t len;
+
+ if (isatty(STDIN_FILENO)) {
+ struct termios old, new;
+
+ fprintf(stderr, "%s", prompt);
+ fflush(stderr);
+
+ if (tcgetattr(STDIN_FILENO, &old))
+ die("error getting terminal attrs");
+
+ new = old;
+ new.c_lflag &= ~ECHO;
+ if (tcsetattr(STDIN_FILENO, TCSAFLUSH, &new))
+ die("error setting terminal attrs");
+
+ len = getline(&buf, &buflen, stdin);
+
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &old);
+ fprintf(stderr, "\n");
+ } else {
+ len = getline(&buf, &buflen, stdin);
+ }
+
+ if (len < 0)
+ die("error reading passphrase");
+ if (len && buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+void derive_passphrase(struct bch_sb_field_crypt *crypt,
+ struct bch_key *key,
+ const char *passphrase)
+{
+ const unsigned char salt[] = "bcache";
+ int ret;
+
+ switch (BCH_CRYPT_KDF_TYPE(crypt)) {
+ case BCH_KDF_SCRYPT:
+ ret = libscrypt_scrypt((void *) passphrase, strlen(passphrase),
+ salt, sizeof(salt),
+ 1ULL << BCH_KDF_SCRYPT_N(crypt),
+ 1ULL << BCH_KDF_SCRYPT_R(crypt),
+ 1ULL << BCH_KDF_SCRYPT_P(crypt),
+ (void *) key, sizeof(*key));
+ if (ret)
+ die("scrypt error: %i", ret);
+ break;
+ default:
+ die("unknown kdf type %llu", BCH_CRYPT_KDF_TYPE(crypt));
+ }
+}
+
+void bch_sb_crypt_init(struct bch_sb *sb,
+ struct bch_sb_field_crypt *crypt,
+ const char *passphrase)
+{
+ struct bch_key passphrase_key;
+
+ SET_BCH_CRYPT_KDF_TYPE(crypt, BCH_KDF_SCRYPT);
+ SET_BCH_KDF_SCRYPT_N(crypt, ilog2(SCRYPT_N));
+ SET_BCH_KDF_SCRYPT_R(crypt, ilog2(SCRYPT_r));
+ SET_BCH_KDF_SCRYPT_P(crypt, ilog2(SCRYPT_p));
+
+ derive_passphrase(crypt, &passphrase_key, passphrase);
+
+ crypt->key.magic = BCH_KEY_MAGIC;
+ get_random_bytes(&crypt->key.key, sizeof(crypt->key.key));
+
+ assert(!bch_key_is_encrypted(&crypt->key));
+
+ if (bch_chacha_encrypt_key(&passphrase_key, __bch_sb_key_nonce(sb),
+ &crypt->key, sizeof(crypt->key)))
+ die("error encrypting key");
+
+ assert(bch_key_is_encrypted(&crypt->key));
+
+ memzero_explicit(&passphrase_key, sizeof(passphrase_key));
+}
diff --git a/crypto.h b/crypto.h
new file mode 100644
index 00000000..643073eb
--- /dev/null
+++ b/crypto.h
@@ -0,0 +1,13 @@
+#ifndef _CRYPTO_H
+#define _CRYPTO_H
+
+#include "super-io.h"
+#include "tools-util.h"
+
+char *read_passphrase(const char *);
+void derive_passphrase(struct bch_sb_field_crypt *,
+ struct bch_key *, const char *);
+void bch_sb_crypt_init(struct bch_sb *sb, struct bch_sb_field_crypt *,
+ const char *);
+
+#endif /* _CRYPTO_H */
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 31f453ee..d8bfcc1f 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -13,200 +13,24 @@
#define _CRYPTO_ALGAPI_H
#include <linux/crypto.h>
-#include <linux/device.h>
-#include <linux/list.h>
-#include <linux/kernel.h>
-#include <linux/kthread.h>
-
-struct crypto_aead;
-struct crypto_instance;
-struct module;
-struct rtattr;
-struct seq_file;
-struct sk_buff;
struct crypto_type {
unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask);
unsigned int (*extsize)(struct crypto_alg *alg);
int (*init)(struct crypto_tfm *tfm, u32 type, u32 mask);
int (*init_tfm)(struct crypto_tfm *tfm);
- void (*show)(struct seq_file *m, struct crypto_alg *alg);
- struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
- void (*free)(struct crypto_instance *inst);
-
- unsigned int type;
- unsigned int maskclear;
- unsigned int maskset;
- unsigned int tfmsize;
-};
-
-struct crypto_instance {
- struct crypto_alg alg;
-
- struct crypto_template *tmpl;
- struct hlist_node list;
-
- void *__ctx[] CRYPTO_MINALIGN_ATTR;
-};
-
-struct crypto_template {
- struct list_head list;
- struct hlist_head instances;
- struct module *module;
-
- struct crypto_instance *(*alloc)(struct rtattr **tb);
- void (*free)(struct crypto_instance *inst);
- int (*create)(struct crypto_template *tmpl, struct rtattr **tb);
-
- char name[CRYPTO_MAX_ALG_NAME];
-};
-
-struct scatter_walk {
- struct scatterlist *sg;
- unsigned int offset;
-};
-
-struct blkcipher_walk {
- union {
- struct {
- struct page *page;
- unsigned long offset;
- } phys;
-
- struct {
- u8 *page;
- u8 *addr;
- } virt;
- } src, dst;
- struct scatter_walk in;
- unsigned int nbytes;
-
- struct scatter_walk out;
- unsigned int total;
-
- void *page;
- u8 *buffer;
- u8 *iv;
- unsigned int ivsize;
-
- int flags;
- unsigned int walk_blocksize;
- unsigned int cipher_blocksize;
- unsigned int alignmask;
+ unsigned type;
+ unsigned maskclear;
+ unsigned maskset;
+ unsigned tfmsize;
};
extern const struct crypto_type crypto_blkcipher_type;
-struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
-int crypto_check_attr_type(struct rtattr **tb, u32 type);
-const char *crypto_attr_alg_name(struct rtattr *rta);
-struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
- const struct crypto_type *frontend,
- u32 type, u32 mask);
-
-static inline struct crypto_alg *crypto_attr_alg(struct rtattr *rta,
- u32 type, u32 mask)
-{
- return crypto_attr_alg2(rta, NULL, type, mask);
-}
-
-int crypto_attr_u32(struct rtattr *rta, u32 *num);
-
-/* These functions require the input/output to be aligned as u32. */
-void crypto_inc(u8 *a, unsigned int size);
-void crypto_xor(u8 *dst, const u8 *src, unsigned int size);
-
-int blkcipher_walk_done(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk, int err);
-int blkcipher_walk_virt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk);
-int blkcipher_walk_phys(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk);
-int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk,
- unsigned int blocksize);
-int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk,
- struct crypto_aead *tfm,
- unsigned int blocksize);
-
-static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
-{
- return PTR_ALIGN(crypto_tfm_ctx(tfm),
- crypto_tfm_alg_alignmask(tfm) + 1);
-}
-
-static inline struct crypto_instance *crypto_tfm_alg_instance(
- struct crypto_tfm *tfm)
-{
- return container_of(tfm->__crt_alg, struct crypto_instance, alg);
-}
-
-static inline void *crypto_instance_ctx(struct crypto_instance *inst)
-{
- return inst->__ctx;
-}
-
static inline void *crypto_blkcipher_ctx(struct crypto_blkcipher *tfm)
{
return crypto_tfm_ctx(&tfm->base);
}
-static inline void *crypto_blkcipher_ctx_aligned(struct crypto_blkcipher *tfm)
-{
- return crypto_tfm_ctx_aligned(&tfm->base);
-}
-
-static inline struct cipher_alg *crypto_cipher_alg(struct crypto_cipher *tfm)
-{
- return &crypto_cipher_tfm(tfm)->__crt_alg->cra_cipher;
-}
-
-static inline void blkcipher_walk_init(struct blkcipher_walk *walk,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes)
-{
- walk->in.sg = src;
- walk->out.sg = dst;
- walk->total = nbytes;
-}
-
-static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb,
- u32 type, u32 mask)
-{
- return crypto_attr_alg(tb[1], type, mask);
-}
-
-static inline int crypto_requires_sync(u32 type, u32 mask)
-{
- return (type ^ CRYPTO_ALG_ASYNC) & mask & CRYPTO_ALG_ASYNC;
-}
-
-noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
-
-/**
- * crypto_memneq - Compare two areas of memory without leaking
- * timing information.
- *
- * @a: One area of memory
- * @b: Another area of memory
- * @size: The size of the area.
- *
- * Returns 0 when data is equal, 1 otherwise.
- */
-static inline int crypto_memneq(const void *a, const void *b, size_t size)
-{
- return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
-}
-
-static inline void crypto_yield(u32 flags)
-{
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
- if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
- cond_resched();
-#endif
-}
-
#endif /* _CRYPTO_ALGAPI_H */
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index 20d20f68..1cdc77ba 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -12,15 +12,4 @@
#define CHACHA20_KEY_SIZE 32
#define CHACHA20_BLOCK_SIZE 64
-struct chacha20_ctx {
- u32 key[8];
-};
-
-void chacha20_block(u32 *state, void *stream);
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
-int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keysize);
-int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes);
-
#endif
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 00bd4e7e..97edaa88 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -16,13 +16,6 @@
#include <linux/crypto.h>
#include <linux/string.h>
-struct hash_alg_common {
- unsigned int digestsize;
- unsigned int statesize;
-
- struct crypto_alg base;
-};
-
struct shash_desc {
struct crypto_shash *tfm;
u32 flags;
@@ -37,31 +30,21 @@ struct shash_desc {
struct shash_alg {
int (*init)(struct shash_desc *desc);
- int (*update)(struct shash_desc *desc, const u8 *data,
- unsigned int len);
+ int (*update)(struct shash_desc *desc, const u8 *data, unsigned len);
int (*final)(struct shash_desc *desc, u8 *out);
int (*finup)(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
+ unsigned len, u8 *out);
int (*digest)(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
- int (*export)(struct shash_desc *desc, void *out);
- int (*import)(struct shash_desc *desc, const void *in);
- int (*setkey)(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen);
-
- unsigned int descsize;
-
- /* These fields must match hash_alg_common. */
- unsigned int digestsize
- __attribute__ ((aligned(__alignof__(struct hash_alg_common))));
- unsigned int statesize;
+ unsigned len, u8 *out);
- struct crypto_alg base;
+ unsigned descsize;
+ unsigned digestsize;
+ struct crypto_alg base;
};
struct crypto_shash {
- unsigned int descsize;
- struct crypto_tfm base;
+ unsigned descsize;
+ struct crypto_tfm base;
};
struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
@@ -77,27 +60,6 @@ static inline void crypto_free_shash(struct crypto_shash *tfm)
crypto_destroy_tfm(tfm, crypto_shash_tfm(tfm));
}
-static inline const char *crypto_shash_alg_name(struct crypto_shash *tfm)
-{
- return crypto_tfm_alg_name(crypto_shash_tfm(tfm));
-}
-
-static inline const char *crypto_shash_driver_name(struct crypto_shash *tfm)
-{
- return crypto_tfm_alg_driver_name(crypto_shash_tfm(tfm));
-}
-
-static inline unsigned int crypto_shash_alignmask(
- struct crypto_shash *tfm)
-{
- return crypto_tfm_alg_alignmask(crypto_shash_tfm(tfm));
-}
-
-static inline unsigned int crypto_shash_blocksize(struct crypto_shash *tfm)
-{
- return crypto_tfm_alg_blocksize(crypto_shash_tfm(tfm));
-}
-
static inline struct shash_alg *__crypto_shash_alg(struct crypto_alg *alg)
{
return container_of(alg, struct shash_alg, base);
@@ -108,32 +70,12 @@ static inline struct shash_alg *crypto_shash_alg(struct crypto_shash *tfm)
return __crypto_shash_alg(crypto_shash_tfm(tfm)->__crt_alg);
}
-static inline unsigned int crypto_shash_digestsize(struct crypto_shash *tfm)
+static inline unsigned crypto_shash_digestsize(struct crypto_shash *tfm)
{
return crypto_shash_alg(tfm)->digestsize;
}
-static inline unsigned int crypto_shash_statesize(struct crypto_shash *tfm)
-{
- return crypto_shash_alg(tfm)->statesize;
-}
-
-static inline u32 crypto_shash_get_flags(struct crypto_shash *tfm)
-{
- return crypto_tfm_get_flags(crypto_shash_tfm(tfm));
-}
-
-static inline void crypto_shash_set_flags(struct crypto_shash *tfm, u32 flags)
-{
- crypto_tfm_set_flags(crypto_shash_tfm(tfm), flags);
-}
-
-static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags)
-{
- crypto_tfm_clear_flags(crypto_shash_tfm(tfm), flags);
-}
-
-static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm)
+static inline unsigned crypto_shash_descsize(struct crypto_shash *tfm)
{
return tfm->descsize;
}
@@ -143,39 +85,32 @@ static inline void *shash_desc_ctx(struct shash_desc *desc)
return desc->__ctx;
}
-int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen);
-
-int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
-
-static inline int crypto_shash_export(struct shash_desc *desc, void *out)
+static inline int crypto_shash_init(struct shash_desc *desc)
{
- return crypto_shash_alg(desc->tfm)->export(desc, out);
+ return crypto_shash_alg(desc->tfm)->init(desc);
}
-static inline int crypto_shash_import(struct shash_desc *desc, const void *in)
+static inline int crypto_shash_update(struct shash_desc *desc,
+ const u8 *data, unsigned len)
{
- return crypto_shash_alg(desc->tfm)->import(desc, in);
+ return crypto_shash_alg(desc->tfm)->update(desc, data, len);
}
-static inline int crypto_shash_init(struct shash_desc *desc)
+static inline int crypto_shash_final(struct shash_desc *desc, u8 *out)
{
- return crypto_shash_alg(desc->tfm)->init(desc);
+ return crypto_shash_alg(desc->tfm)->final(desc, out);
}
-int crypto_shash_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-int crypto_shash_final(struct shash_desc *desc, u8 *out);
-
-int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out);
+static inline int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
+{
+ return crypto_shash_alg(desc->tfm)->finup(desc, data, len, out);
+}
-static inline void shash_desc_zero(struct shash_desc *desc)
+static inline int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
{
- memzero_explicit(desc,
- sizeof(*desc) + crypto_shash_descsize(desc->tfm));
+ return crypto_shash_alg(desc->tfm)->digest(desc, data, len, out);
}
#endif /* _CRYPTO_HASH_H */
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 2d85c803..3973047b 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -5,9 +5,6 @@
#include <crypto/hash.h>
int crypto_register_shash(struct shash_alg *alg);
-int crypto_unregister_shash(struct shash_alg *alg);
-int crypto_register_shashes(struct shash_alg *algs, int count);
-int crypto_unregister_shashes(struct shash_alg *algs, int count);
static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
{
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
index 894df59b..9fcfbfeb 100644
--- a/include/crypto/poly1305.h
+++ b/include/crypto/poly1305.h
@@ -5,37 +5,9 @@
#ifndef _CRYPTO_POLY1305_H
#define _CRYPTO_POLY1305_H
-#include <linux/types.h>
-#include <linux/crypto.h>
+#include <sodium/crypto_onetimeauth_poly1305.h>
-#define POLY1305_BLOCK_SIZE 16
-#define POLY1305_KEY_SIZE 32
-#define POLY1305_DIGEST_SIZE 16
-
-struct poly1305_desc_ctx {
- /* key */
- u32 r[5];
- /* finalize key */
- u32 s[4];
- /* accumulator */
- u32 h[5];
- /* partial buffer */
- u8 buf[POLY1305_BLOCK_SIZE];
- /* bytes used in partial buffer */
- unsigned int buflen;
- /* r key has been set */
- bool rset;
- /* s key has been set */
- bool sset;
-};
-
-int crypto_poly1305_init(struct shash_desc *desc);
-int crypto_poly1305_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen);
-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
+#define POLY1305_KEY_SIZE crypto_onetimeauth_poly1305_KEYBYTES
+#define POLY1305_DIGEST_SIZE crypto_onetimeauth_poly1305_BYTES
#endif
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
deleted file mode 100644
index c94d3eb1..00000000
--- a/include/crypto/sha.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Common values for SHA algorithms
- */
-
-#ifndef _CRYPTO_SHA_H
-#define _CRYPTO_SHA_H
-
-#include <linux/types.h>
-
-#define SHA1_DIGEST_SIZE 20
-#define SHA1_BLOCK_SIZE 64
-
-#define SHA224_DIGEST_SIZE 28
-#define SHA224_BLOCK_SIZE 64
-
-#define SHA256_DIGEST_SIZE 32
-#define SHA256_BLOCK_SIZE 64
-
-#define SHA384_DIGEST_SIZE 48
-#define SHA384_BLOCK_SIZE 128
-
-#define SHA512_DIGEST_SIZE 64
-#define SHA512_BLOCK_SIZE 128
-
-#define SHA1_H0 0x67452301UL
-#define SHA1_H1 0xefcdab89UL
-#define SHA1_H2 0x98badcfeUL
-#define SHA1_H3 0x10325476UL
-#define SHA1_H4 0xc3d2e1f0UL
-
-#define SHA224_H0 0xc1059ed8UL
-#define SHA224_H1 0x367cd507UL
-#define SHA224_H2 0x3070dd17UL
-#define SHA224_H3 0xf70e5939UL
-#define SHA224_H4 0xffc00b31UL
-#define SHA224_H5 0x68581511UL
-#define SHA224_H6 0x64f98fa7UL
-#define SHA224_H7 0xbefa4fa4UL
-
-#define SHA256_H0 0x6a09e667UL
-#define SHA256_H1 0xbb67ae85UL
-#define SHA256_H2 0x3c6ef372UL
-#define SHA256_H3 0xa54ff53aUL
-#define SHA256_H4 0x510e527fUL
-#define SHA256_H5 0x9b05688cUL
-#define SHA256_H6 0x1f83d9abUL
-#define SHA256_H7 0x5be0cd19UL
-
-#define SHA384_H0 0xcbbb9d5dc1059ed8ULL
-#define SHA384_H1 0x629a292a367cd507ULL
-#define SHA384_H2 0x9159015a3070dd17ULL
-#define SHA384_H3 0x152fecd8f70e5939ULL
-#define SHA384_H4 0x67332667ffc00b31ULL
-#define SHA384_H5 0x8eb44a8768581511ULL
-#define SHA384_H6 0xdb0c2e0d64f98fa7ULL
-#define SHA384_H7 0x47b5481dbefa4fa4ULL
-
-#define SHA512_H0 0x6a09e667f3bcc908ULL
-#define SHA512_H1 0xbb67ae8584caa73bULL
-#define SHA512_H2 0x3c6ef372fe94f82bULL
-#define SHA512_H3 0xa54ff53a5f1d36f1ULL
-#define SHA512_H4 0x510e527fade682d1ULL
-#define SHA512_H5 0x9b05688c2b3e6c1fULL
-#define SHA512_H6 0x1f83d9abfb41bd6bULL
-#define SHA512_H7 0x5be0cd19137e2179ULL
-
-extern const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE];
-
-extern const u8 sha224_zero_message_hash[SHA224_DIGEST_SIZE];
-
-extern const u8 sha256_zero_message_hash[SHA256_DIGEST_SIZE];
-
-struct sha1_state {
- u32 state[SHA1_DIGEST_SIZE / 4];
- u64 count;
- u8 buffer[SHA1_BLOCK_SIZE];
-};
-
-struct sha256_state {
- u32 state[SHA256_DIGEST_SIZE / 4];
- u64 count;
- u8 buf[SHA256_BLOCK_SIZE];
-};
-
-struct sha512_state {
- u64 state[SHA512_DIGEST_SIZE / 8];
- u64 count[2];
- u8 buf[SHA512_BLOCK_SIZE];
-};
-
-struct shash_desc;
-
-extern int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
-extern int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-
-extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
- unsigned int len);
-
-extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *hash);
-#endif
diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
deleted file mode 100644
index 01b002de..00000000
--- a/include/crypto/sha1_base.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * sha1_base.h - core logic for SHA-1 implementations
- *
- * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/sha.h>
-#include <linux/byteorder.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-
-#include <asm/unaligned.h>
-
-typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks);
-
-static inline int sha1_base_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA1_H0;
- sctx->state[1] = SHA1_H1;
- sctx->state[2] = SHA1_H2;
- sctx->state[3] = SHA1_H3;
- sctx->state[4] = SHA1_H4;
- sctx->count = 0;
-
- return 0;
-}
-
-static inline int sha1_base_do_update(struct shash_desc *desc,
- const u8 *data,
- unsigned int len,
- sha1_block_fn *block_fn)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->count += len;
-
- if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) {
- int blocks;
-
- if (partial) {
- int p = SHA1_BLOCK_SIZE - partial;
-
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
-
- block_fn(sctx, sctx->buffer, 1);
- }
-
- blocks = len / SHA1_BLOCK_SIZE;
- len %= SHA1_BLOCK_SIZE;
-
- if (blocks) {
- block_fn(sctx, data, blocks);
- data += blocks * SHA1_BLOCK_SIZE;
- }
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
-
- return 0;
-}
-
-static inline int sha1_base_do_finalize(struct shash_desc *desc,
- sha1_block_fn *block_fn)
-{
- const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->buffer[partial++] = 0x80;
- if (partial > bit_offset) {
- memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial);
- partial = 0;
-
- block_fn(sctx, sctx->buffer, 1);
- }
-
- memset(sctx->buffer + partial, 0x0, bit_offset - partial);
- *bits = cpu_to_be64(sctx->count << 3);
- block_fn(sctx, sctx->buffer, 1);
-
- return 0;
-}
-
-static inline int sha1_base_finish(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *digest = (__be32 *)out;
- int i;
-
- for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], digest++);
-
- *sctx = (struct sha1_state){};
- return 0;
-}
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
new file mode 100644
index 00000000..a7a2ee45
--- /dev/null
+++ b/include/keys/user-type.h
@@ -0,0 +1,6 @@
+#ifndef _KEYS_USER_TYPE_H
+#define _KEYS_USER_TYPE_H
+
+#include <linux/key.h>
+
+#endif /* _KEYS_USER_TYPE_H */
diff --git a/include/linux/bcache.h b/include/linux/bcache.h
index f09a44a6..4179f8dd 100644
--- a/include/linux/bcache.h
+++ b/include/linux/bcache.h
@@ -102,9 +102,17 @@ struct bch_val {
__u64 __nothing[0];
};
-struct bkey {
- __u64 _data[0];
+struct bversion {
+#if defined(__LITTLE_ENDIAN)
+ __u64 lo;
+ __u32 hi;
+#elif defined(__BIG_ENDIAN)
+ __u32 hi;
+ __u64 lo;
+#endif
+} __attribute__((packed, aligned(4)));
+struct bkey {
/* Size of combined key and value, in u64s */
__u8 u64s;
@@ -125,13 +133,13 @@ struct bkey {
#if defined(__LITTLE_ENDIAN)
__u8 pad[1];
- __u32 version;
+ struct bversion version;
__u32 size; /* extent size, in sectors */
struct bpos p;
#elif defined(__BIG_ENDIAN)
struct bpos p;
__u32 size; /* extent size, in sectors */
- __u32 version;
+ struct bversion version;
__u8 pad[1];
#endif
@@ -184,7 +192,8 @@ enum bch_bkey_fields {
BKEY_FIELD_OFFSET,
BKEY_FIELD_SNAPSHOT,
BKEY_FIELD_SIZE,
- BKEY_FIELD_VERSION,
+ BKEY_FIELD_VERSION_HI,
+ BKEY_FIELD_VERSION_LO,
BKEY_NR_FIELDS,
};
@@ -200,14 +209,25 @@ enum bch_bkey_fields {
bkey_format_field(OFFSET, p.offset), \
bkey_format_field(SNAPSHOT, p.snapshot), \
bkey_format_field(SIZE, size), \
- bkey_format_field(VERSION, version), \
+ bkey_format_field(VERSION_HI, version.hi), \
+ bkey_format_field(VERSION_LO, version.lo), \
}, \
})
/* bkey with inline value */
struct bkey_i {
- struct bkey k;
- struct bch_val v;
+ __u64 _data[0];
+
+ union {
+ struct {
+ /* Size of combined key and value, in u64s */
+ __u8 u64s;
+ };
+ struct {
+ struct bkey k;
+ struct bch_val v;
+ };
+ };
};
#ifndef __cplusplus
@@ -358,20 +378,47 @@ BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE);
* is neither checksummed nor compressed.
*/
+/* 128 bits, sufficient for cryptographic MACs: */
+struct bch_csum {
+ __le64 lo;
+ __le64 hi;
+} __attribute__((packed, aligned(8)));
+
+#define BCH_CSUM_NONE 0U
+#define BCH_CSUM_CRC32C 1U
+#define BCH_CSUM_CRC64 2U
+#define BCH_CSUM_CHACHA20_POLY1305_80 3U
+#define BCH_CSUM_CHACHA20_POLY1305_128 4U
+#define BCH_CSUM_NR 5U
+
+static inline _Bool bch_csum_type_is_encryption(unsigned type)
+{
+ switch (type) {
+ case BCH_CSUM_CHACHA20_POLY1305_80:
+ case BCH_CSUM_CHACHA20_POLY1305_128:
+ return true;
+ default:
+ return false;
+ }
+}
+
enum bch_extent_entry_type {
- BCH_EXTENT_ENTRY_crc32 = 0,
- BCH_EXTENT_ENTRY_ptr = 1,
+ BCH_EXTENT_ENTRY_ptr = 0,
+ BCH_EXTENT_ENTRY_crc32 = 1,
BCH_EXTENT_ENTRY_crc64 = 2,
+ BCH_EXTENT_ENTRY_crc128 = 3,
};
-#define BCH_EXTENT_ENTRY_MAX 3
+#define BCH_EXTENT_ENTRY_MAX 4
+/* Compressed/uncompressed size are stored biased by 1: */
struct bch_extent_crc32 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u32 type:1,
+ __u32 type:2,
+ _compressed_size:7,
+ _uncompressed_size:7,
offset:7,
- compressed_size:8,
- uncompressed_size:8,
+ _unused:1,
csum_type:4,
compression_type:4;
__u32 csum;
@@ -379,45 +426,80 @@ struct bch_extent_crc32 {
__u32 csum;
__u32 compression_type:4,
csum_type:4,
- uncompressed_size:8,
- compressed_size:8,
+ _unused:1,
offset:7,
- type:1;
+ _uncompressed_size:7,
+ _compressed_size:7,
+ type:2;
#endif
} __attribute__((packed, aligned(8)));
-#define CRC32_EXTENT_SIZE_MAX (1U << 7)
-
-/* 64k */
-#define BCH_COMPRESSED_EXTENT_MAX 128U
+#define CRC32_SIZE_MAX (1U << 7)
+#define CRC32_NONCE_MAX 0
struct bch_extent_crc64 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:3,
- offset:17,
- compressed_size:18,
- uncompressed_size:18,
+ _compressed_size:9,
+ _uncompressed_size:9,
+ offset:9,
+ nonce:10,
+ csum_type:4,
+ compression_type:4,
+ csum_hi:16;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 csum_hi:16,
+ compression_type:4,
+ csum_type:4,
+ nonce:10,
+ offset:9,
+ _uncompressed_size:9,
+ _compressed_size:9,
+ type:3;
+#endif
+ __u64 csum_lo;
+} __attribute__((packed, aligned(8)));
+
+#define CRC64_SIZE_MAX (1U << 9)
+#define CRC64_NONCE_MAX ((1U << 10) - 1)
+
+struct bch_extent_crc128 {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:4,
+ _compressed_size:13,
+ _uncompressed_size:13,
+ offset:13,
+ nonce:13,
csum_type:4,
compression_type:4;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 compression_type:4,
csum_type:4,
- uncompressed_size:18,
- compressed_size:18,
- offset:17,
+ nonce:14,
+ offset:13,
+ _uncompressed_size:13,
+ _compressed_size:13,
type:3;
#endif
- __u64 csum;
+ struct bch_csum csum;
} __attribute__((packed, aligned(8)));
-#define CRC64_EXTENT_SIZE_MAX (1U << 17)
+#define CRC128_SIZE_MAX (1U << 13)
+#define CRC128_NONCE_MAX ((1U << 13) - 1)
+
+/*
+ * Max size of an extent that may require bouncing to read or write
+ * (checksummed, compressed): 64k
+ */
+#define BCH_ENCODED_EXTENT_MAX 128U
/*
* @reservation - pointer hasn't been written to, just reserved
*/
struct bch_extent_ptr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u64 type:2,
+ __u64 type:1,
+ cached:1,
erasure_coded:1,
reservation:1,
offset:44, /* 8 petabytes */
@@ -429,10 +511,25 @@ struct bch_extent_ptr {
offset:44,
reservation:1,
erasure_coded:1,
- type:2;
+ cached:1,
+ type:1;
#endif
} __attribute__((packed, aligned(8)));
+struct bch_extent_reservation {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u64 type:5,
+ unused:23,
+ replicas:4,
+ generation:32;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ __u64 generation:32,
+ replicas:4,
+ unused:23,
+ type:5;
+#endif
+};
+
union bch_extent_entry {
#if defined(__LITTLE_ENDIAN) || __BITS_PER_LONG == 64
unsigned long type;
@@ -446,6 +543,7 @@ union bch_extent_entry {
#endif
struct bch_extent_crc32 crc32;
struct bch_extent_crc64 crc64;
+ struct bch_extent_crc128 crc128;
struct bch_extent_ptr ptr;
};
@@ -473,9 +571,18 @@ struct bch_extent {
} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(extent, BCH_EXTENT);
+struct bch_reservation {
+ struct bch_val v;
+
+ __le32 generation;
+ __u8 nr_replicas;
+ __u8 pad[3];
+} __attribute__((packed, aligned(8)));
+BKEY_VAL_TYPE(reservation, BCH_RESERVATION);
+
/* Maximum size (in u64s) a single pointer could be: */
#define BKEY_EXTENT_PTR_U64s_MAX\
- ((sizeof(struct bch_extent_crc64) + \
+ ((sizeof(struct bch_extent_crc128) + \
sizeof(struct bch_extent_ptr)) / sizeof(u64))
/* Maximum possible size of an entire extent value: */
@@ -506,28 +613,26 @@ enum bch_inode_types {
struct bch_inode {
struct bch_val v;
- __le16 i_mode;
- __le16 pad;
- __le32 i_flags;
-
- /* Nanoseconds */
- __le64 i_atime;
- __le64 i_ctime;
- __le64 i_mtime;
-
- __le64 i_size;
- __le64 i_sectors;
-
- __le32 i_uid;
- __le32 i_gid;
- __le32 i_nlink;
-
- __le32 i_dev;
-
__le64 i_hash_seed;
+ __le32 i_flags;
+ __le16 i_mode;
+ __u8 fields[0];
} __attribute__((packed));
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
+#define BCH_INODE_FIELDS() \
+ BCH_INODE_FIELD(i_atime, 64) \
+ BCH_INODE_FIELD(i_ctime, 64) \
+ BCH_INODE_FIELD(i_mtime, 64) \
+ BCH_INODE_FIELD(i_otime, 64) \
+ BCH_INODE_FIELD(i_size, 64) \
+ BCH_INODE_FIELD(i_sectors, 64) \
+ BCH_INODE_FIELD(i_uid, 32) \
+ BCH_INODE_FIELD(i_gid, 32) \
+ BCH_INODE_FIELD(i_nlink, 32) \
+ BCH_INODE_FIELD(i_generation, 32) \
+ BCH_INODE_FIELD(i_dev, 32)
+
enum {
/*
* User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL
@@ -544,9 +649,9 @@ enum {
/* not implemented yet: */
__BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */
-};
-LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32);
+ /* bits 20+ reserved for packed fields below: */
+};
#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC)
#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE)
@@ -557,6 +662,9 @@ LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32);
#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY)
#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS)
+LE32_BITMASK(INODE_STR_HASH, struct bch_inode, i_flags, 20, 24);
+LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, i_flags, 24, 32);
+
struct bch_inode_blockdev {
struct bch_val v;
@@ -574,6 +682,7 @@ BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV);
/* Thin provisioned volume, or cache for another block device? */
LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1)
+
/* Dirents */
/*
@@ -639,6 +748,7 @@ BKEY_VAL_TYPE(xattr, BCH_XATTR);
* Version 4: Backing device with data offset
* Version 5: All the incompat changes
* Version 6: Cache device UUIDs all in superblock, another incompat bset change
+ * Version 7: Encryption (expanded checksum fields), other random things
*/
#define BCACHE_SB_VERSION_CDEV_V0 0
#define BCACHE_SB_VERSION_BDEV 1
@@ -646,16 +756,15 @@ BKEY_VAL_TYPE(xattr, BCH_XATTR);
#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4
#define BCACHE_SB_VERSION_CDEV_V2 5
#define BCACHE_SB_VERSION_CDEV_V3 6
-#define BCACHE_SB_VERSION_CDEV 6
-#define BCACHE_SB_MAX_VERSION 6
+#define BCACHE_SB_VERSION_CDEV_V4 7
+#define BCACHE_SB_VERSION_CDEV 7
+#define BCACHE_SB_MAX_VERSION 7
-#define SB_SECTOR 8
-#define SB_LABEL_SIZE 32
-#define MAX_CACHES_PER_SET 64
-
-#define BDEV_DATA_START_DEFAULT 16 /* sectors */
+#define BCH_SB_SECTOR 8
+#define BCH_SB_LABEL_SIZE 32
+#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
-struct cache_member {
+struct bch_member {
uuid_le uuid;
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
@@ -663,164 +772,257 @@ struct cache_member {
__le32 pad;
__le64 last_mount; /* time_t */
- __le64 f1;
- __le64 f2;
+ __le64 flags[2];
};
-LE64_BITMASK(CACHE_STATE, struct cache_member, f1, 0, 4)
-#define CACHE_ACTIVE 0U
-#define CACHE_RO 1U
-#define CACHE_FAILED 2U
-#define CACHE_SPARE 3U
-#define CACHE_STATE_NR 4U
+LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags[0], 0, 4)
+LE64_BITMASK(BCH_MEMBER_TIER, struct bch_member, flags[0], 4, 8)
+LE64_BITMASK(BCH_MEMBER_HAS_METADATA, struct bch_member, flags[0], 8, 9)
+LE64_BITMASK(BCH_MEMBER_HAS_DATA, struct bch_member, flags[0], 9, 10)
+LE64_BITMASK(BCH_MEMBER_REPLACEMENT, struct bch_member, flags[0], 10, 14)
+LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags[0], 14, 15);
-LE64_BITMASK(CACHE_TIER, struct cache_member, f1, 4, 8)
-#define CACHE_TIERS 4U
+#if 0
+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
+#endif
-LE64_BITMASK(CACHE_REPLICATION_SET, struct cache_member, f1, 8, 16)
+enum bch_member_state {
+ BCH_MEMBER_STATE_ACTIVE = 0,
+ BCH_MEMBER_STATE_RO = 1,
+ BCH_MEMBER_STATE_FAILED = 2,
+ BCH_MEMBER_STATE_SPARE = 3,
+ BCH_MEMBER_STATE_NR = 4,
+};
-LE64_BITMASK(CACHE_HAS_METADATA, struct cache_member, f1, 24, 25)
-LE64_BITMASK(CACHE_HAS_DATA, struct cache_member, f1, 25, 26)
+#define BCH_TIER_MAX 4U
-LE64_BITMASK(CACHE_REPLACEMENT, struct cache_member, f1, 26, 30)
-#define CACHE_REPLACEMENT_LRU 0U
-#define CACHE_REPLACEMENT_FIFO 1U
-#define CACHE_REPLACEMENT_RANDOM 2U
-#define CACHE_REPLACEMENT_NR 3U
+enum cache_replacement {
+ CACHE_REPLACEMENT_LRU = 0,
+ CACHE_REPLACEMENT_FIFO = 1,
+ CACHE_REPLACEMENT_RANDOM = 2,
+ CACHE_REPLACEMENT_NR = 3,
+};
-LE64_BITMASK(CACHE_DISCARD, struct cache_member, f1, 30, 31);
+struct bch_sb_layout {
+ uuid_le magic; /* bcache superblock UUID */
+ __u8 layout_type;
+ __u8 sb_max_size_bits; /* base 2 of 512 byte sectors */
+ __u8 nr_superblocks;
+ __u8 pad[5];
+ __u64 sb_offset[61];
+} __attribute__((packed));
-LE64_BITMASK(CACHE_NR_READ_ERRORS, struct cache_member, f2, 0, 20);
-LE64_BITMASK(CACHE_NR_WRITE_ERRORS, struct cache_member, f2, 20, 40);
+#define BCH_SB_LAYOUT_SECTOR 7
-struct cache_sb {
- __le64 csum;
- __le64 offset; /* sector where this sb was written */
- __le64 version; /* of on disk format */
+struct bch_sb_field {
+ __u64 _data[0];
+ __le32 u64s;
+ __le32 type;
+};
- uuid_le magic; /* bcache superblock UUID */
+enum bch_sb_field_types {
+ BCH_SB_FIELD_journal = 0,
+ BCH_SB_FIELD_members = 1,
+ BCH_SB_FIELD_crypt = 2,
+ BCH_SB_FIELD_NR = 3,
+};
- /* Identifies this disk within the cache set: */
- uuid_le disk_uuid;
+struct bch_sb_field_journal {
+ struct bch_sb_field field;
+ __le64 buckets[0];
+};
- /*
- * Internal cache set UUID - xored with various magic numbers and thus
- * must never change:
- */
- union {
- uuid_le set_uuid;
- __le64 set_magic;
- };
+struct bch_sb_field_members {
+ struct bch_sb_field field;
+ struct bch_member members[0];
+};
+
+/* Crypto: */
- __u8 label[SB_LABEL_SIZE];
+struct nonce {
+ __le32 d[4];
+};
+
+struct bch_key {
+ __le64 key[4];
+};
+
+#define BCH_KEY_MAGIC \
+ (((u64) 'b' << 0)|((u64) 'c' << 8)| \
+ ((u64) 'h' << 16)|((u64) '*' << 24)| \
+ ((u64) '*' << 32)|((u64) 'k' << 40)| \
+ ((u64) 'e' << 48)|((u64) 'y' << 56))
+
+struct bch_encrypted_key {
+ __le64 magic;
+ struct bch_key key;
+};
+
+/*
+ * If this field is present in the superblock, it stores an encryption key which
+ * is used encrypt all other data/metadata. The key will normally be encrypted
+ * with the key userspace provides, but if encryption has been turned off we'll
+ * just store the master key unencrypted in the superblock so we can access the
+ * previously encrypted data.
+ */
+struct bch_sb_field_crypt {
+ struct bch_sb_field field;
__le64 flags;
+ __le64 kdf_flags;
+ struct bch_encrypted_key key;
+};
- /* Incremented each time superblock is written: */
- __le64 seq;
+LE64_BITMASK(BCH_CRYPT_KDF_TYPE, struct bch_sb_field_crypt, flags, 0, 4);
- /*
- * User visible UUID for identifying the cache set the user is allowed
- * to change:
- */
- uuid_le user_uuid;
+enum bch_kdf_types {
+ BCH_KDF_SCRYPT = 0,
+ BCH_KDF_NR = 1,
+};
- __le64 flags2;
- __le64 pad1[5];
+/* stored as base 2 log of scrypt params: */
+LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
+LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
+LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
- /* Number of cache_member entries: */
- __u8 nr_in_set;
+/*
+ * @offset - sector where this sb was written
+ * @version - on disk format version
+ * @magic - identifies as a bcache superblock (BCACHE_MAGIC)
+ * @seq - incremented each time superblock is written
+ * @uuid - used for generating various magic numbers and identifying
+ * member devices, never changes
+ * @user_uuid - user visible UUID, may be changed
+ * @label - filesystem label
+ * @seq - identifies most recent superblock, incremented each time
+ * superblock is written
+ * @features - enabled incompatible features
+ */
+struct bch_sb {
+ struct bch_csum csum;
+ __le64 version;
+ uuid_le magic;
+ uuid_le uuid;
+ uuid_le user_uuid;
+ __u8 label[BCH_SB_LABEL_SIZE];
+ __le64 offset;
+ __le64 seq;
- /*
- * Index of this device - for PTR_DEV(), and also this device's
- * slot in the cache_member array:
- */
- __u8 nr_this_dev;
- __le16 pad2[3];
+ __le16 block_size;
+ __u8 dev_idx;
+ __u8 nr_devices;
+ __le32 u64s;
- __le16 block_size; /* sectors */
- __le16 pad3[6];
+ __le64 time_base_lo;
+ __le32 time_base_hi;
+ __le32 time_precision;
+
+ __le64 flags[8];
+ __le64 features[2];
+ __le64 compat[2];
- __le16 u64s; /* size of variable length portion */
+ struct bch_sb_layout layout;
union {
- struct cache_member members[0];
- /*
- * Journal buckets also in the variable length portion, after
- * the member info:
- */
- __le64 _data[0];
+ struct bch_sb_field start[0];
+ __le64 _data[0];
};
-};
+} __attribute__((packed, aligned(8)));
-/* XXX: rename CACHE_SET -> BCH_FS or something? */
+/*
+ * Flags:
+ * BCH_SB_INITALIZED - set on first mount
+ * BCH_SB_CLEAN - did we shut down cleanly? Just a hint, doesn't affect
+ * behaviour of mount/recovery path:
+ * BCH_SB_INODE_32BIT - limit inode numbers to 32 bits
+ * BCH_SB_128_BIT_MACS - 128 bit macs instead of 80
+ * BCH_SB_ENCRYPTION_TYPE - if nonzero encryption is enabled; overrides
+ * DATA/META_CSUM_TYPE. Also indicates encryption
+ * algorithm in use, if/when we get more than one
+ */
-LE64_BITMASK(CACHE_SET_SYNC, struct cache_sb, flags, 0, 1);
+LE64_BITMASK(BCH_SB_INITIALIZED, struct bch_sb, flags[0], 0, 1);
+LE64_BITMASK(BCH_SB_CLEAN, struct bch_sb, flags[0], 1, 2);
+LE64_BITMASK(BCH_SB_CSUM_TYPE, struct bch_sb, flags[0], 2, 8);
+LE64_BITMASK(BCH_SB_ERROR_ACTION, struct bch_sb, flags[0], 8, 12);
-LE64_BITMASK(CACHE_SET_ERROR_ACTION, struct cache_sb, flags, 1, 4);
-#define BCH_ON_ERROR_CONTINUE 0U
-#define BCH_ON_ERROR_RO 1U
-#define BCH_ON_ERROR_PANIC 2U
-#define BCH_NR_ERROR_ACTIONS 3U
+LE64_BITMASK(BCH_SB_BTREE_NODE_SIZE, struct bch_sb, flags[0], 12, 28);
-LE64_BITMASK(CACHE_SET_META_REPLICAS_WANT,struct cache_sb, flags, 4, 8);
-LE64_BITMASK(CACHE_SET_DATA_REPLICAS_WANT,struct cache_sb, flags, 8, 12);
+LE64_BITMASK(BCH_SB_GC_RESERVE, struct bch_sb, flags[0], 28, 33);
+LE64_BITMASK(BCH_SB_ROOT_RESERVE, struct bch_sb, flags[0], 33, 40);
-#define BCH_REPLICAS_MAX 4U
+LE64_BITMASK(BCH_SB_META_CSUM_TYPE, struct bch_sb, flags[0], 40, 44);
+LE64_BITMASK(BCH_SB_DATA_CSUM_TYPE, struct bch_sb, flags[0], 44, 48);
-LE64_BITMASK(CACHE_SB_CSUM_TYPE, struct cache_sb, flags, 12, 16);
+LE64_BITMASK(BCH_SB_META_REPLICAS_WANT, struct bch_sb, flags[0], 48, 52);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_WANT, struct bch_sb, flags[0], 52, 56);
-LE64_BITMASK(CACHE_SET_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20);
-#define BCH_CSUM_NONE 0U
-#define BCH_CSUM_CRC32C 1U
-#define BCH_CSUM_CRC64 2U
-#define BCH_CSUM_NR 3U
+LE64_BITMASK(BCH_SB_META_REPLICAS_HAVE, struct bch_sb, flags[0], 56, 60);
+LE64_BITMASK(BCH_SB_DATA_REPLICAS_HAVE, struct bch_sb, flags[0], 60, 64);
-LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36);
+LE64_BITMASK(BCH_SB_STR_HASH_TYPE, struct bch_sb, flags[1], 0, 4);
+LE64_BITMASK(BCH_SB_COMPRESSION_TYPE, struct bch_sb, flags[1], 4, 8);
+LE64_BITMASK(BCH_SB_INODE_32BIT, struct bch_sb, flags[1], 8, 9);
-LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40);
-LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44);
+LE64_BITMASK(BCH_SB_128_BIT_MACS, struct bch_sb, flags[1], 9, 10);
+LE64_BITMASK(BCH_SB_ENCRYPTION_TYPE, struct bch_sb, flags[1], 10, 14);
+LE64_BITMASK(BCH_SB_JOURNAL_ENTRY_SIZE, struct bch_sb, flags[1], 14, 20);
-LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48);
-enum bch_str_hash_type {
- BCH_STR_HASH_CRC32C = 0,
- BCH_STR_HASH_CRC64 = 1,
- BCH_STR_HASH_SIPHASH = 2,
- BCH_STR_HASH_SHA1 = 3,
+/* Features: */
+enum bch_sb_features {
+ BCH_FEATURE_LZ4 = 0,
+ BCH_FEATURE_GZIP = 1,
};
-#define BCH_STR_HASH_NR 4
+/* options: */
-LE64_BITMASK(CACHE_SET_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52);
+#define BCH_REPLICAS_MAX 4U
-LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56);
-enum {
- BCH_COMPRESSION_NONE = 0,
- BCH_COMPRESSION_LZ4 = 1,
- BCH_COMPRESSION_GZIP = 2,
+#if 0
+#define BCH_ERROR_ACTIONS() \
+ x(BCH_ON_ERROR_CONTINUE, 0, "continue") \
+ x(BCH_ON_ERROR_RO, 1, "remount-ro") \
+ x(BCH_ON_ERROR_PANIC, 2, "panic") \
+ x(BCH_NR_ERROR_ACTIONS, 3, NULL)
+
+enum bch_error_actions {
+#define x(_opt, _nr, _str) _opt = _nr,
+ BCH_ERROR_ACTIONS()
+#undef x
};
+#endif
-#define BCH_COMPRESSION_NR 3U
-
-/* Limit inode numbers to 32 bits: */
-LE64_BITMASK(CACHE_INODE_32BIT, struct cache_sb, flags, 56, 57);
-
-LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63);
-
-LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6);
+enum bch_error_actions {
+ BCH_ON_ERROR_CONTINUE = 0,
+ BCH_ON_ERROR_RO = 1,
+ BCH_ON_ERROR_PANIC = 2,
+ BCH_NR_ERROR_ACTIONS = 3,
+};
-/*
- * Did we shut down cleanly? Just a hint, doesn't affect behaviour of
- * mount/recovery path:
- */
-LE64_BITMASK(CACHE_SET_CLEAN, struct cache_sb, flags2, 6, 7);
+enum bch_csum_opts {
+ BCH_CSUM_OPT_NONE = 0,
+ BCH_CSUM_OPT_CRC32C = 1,
+ BCH_CSUM_OPT_CRC64 = 2,
+ BCH_CSUM_OPT_NR = 3,
+};
-LE64_BITMASK(CACHE_SET_JOURNAL_ENTRY_SIZE, struct cache_sb, flags2, 7, 15);
+enum bch_str_hash_opts {
+ BCH_STR_HASH_CRC32C = 0,
+ BCH_STR_HASH_CRC64 = 1,
+ BCH_STR_HASH_SIPHASH = 2,
+ BCH_STR_HASH_NR = 3,
+};
-/* options: */
+enum bch_compression_opts {
+ BCH_COMPRESSION_NONE = 0,
+ BCH_COMPRESSION_LZ4 = 1,
+ BCH_COMPRESSION_GZIP = 2,
+ BCH_COMPRESSION_NR = 3,
+};
/**
- * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable)
+ * BCH_OPT(name, choices, min, max, sb_option, sysfs_writeable)
*
* @name - name of mount option, sysfs attribute, and struct cache_set_opts
* member
@@ -838,56 +1040,60 @@ LE64_BITMASK(CACHE_SET_JOURNAL_ENTRY_SIZE, struct cache_sb, flags2, 7, 15);
* @sysfs_writeable - if true, option will be modifiable at runtime via sysfs
*/
-#define CACHE_SET_SB_OPTS() \
- CACHE_SET_OPT(errors, \
- bch_error_actions, \
- 0, BCH_NR_ERROR_ACTIONS, \
- CACHE_SET_ERROR_ACTION, \
- true) \
- CACHE_SET_OPT(metadata_replicas, \
- bch_uint_opt, \
- 0, BCH_REPLICAS_MAX, \
- CACHE_SET_META_REPLICAS_WANT, \
- false) \
- CACHE_SET_OPT(data_replicas, \
- bch_uint_opt, \
- 0, BCH_REPLICAS_MAX, \
- CACHE_SET_DATA_REPLICAS_WANT, \
- false) \
- CACHE_SET_OPT(metadata_checksum, \
- bch_csum_types, \
- 0, BCH_CSUM_NR, \
- CACHE_SET_META_PREFERRED_CSUM_TYPE, \
- true) \
- CACHE_SET_OPT(data_checksum, \
- bch_csum_types, \
- 0, BCH_CSUM_NR, \
- CACHE_SET_DATA_PREFERRED_CSUM_TYPE, \
- true) \
- CACHE_SET_OPT(compression, \
- bch_compression_types, \
- 0, BCH_COMPRESSION_NR, \
- CACHE_SET_COMPRESSION_TYPE, \
- true) \
- CACHE_SET_OPT(str_hash, \
- bch_str_hash_types, \
- 0, BCH_STR_HASH_NR, \
- CACHE_SET_STR_HASH_TYPE, \
- true) \
- CACHE_SET_OPT(inodes_32bit, \
- bch_bool_opt, 0, 2, \
- CACHE_INODE_32BIT, \
- true) \
- CACHE_SET_OPT(gc_reserve_percent, \
- bch_uint_opt, \
- 5, 21, \
- CACHE_SET_GC_RESERVE, \
- false) \
- CACHE_SET_OPT(root_reserve_percent, \
- bch_uint_opt, \
- 0, 21, \
- CACHE_SET_ROOT_RESERVE, \
- false)
+#define BCH_SB_OPTS() \
+ BCH_OPT(errors, \
+ bch_error_actions, \
+ 0, BCH_NR_ERROR_ACTIONS, \
+ BCH_SB_ERROR_ACTION, \
+ true) \
+ BCH_OPT(metadata_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ BCH_SB_META_REPLICAS_WANT, \
+ false) \
+ BCH_OPT(data_replicas, \
+ bch_uint_opt, \
+ 0, BCH_REPLICAS_MAX, \
+ BCH_SB_DATA_REPLICAS_WANT, \
+ false) \
+ BCH_OPT(metadata_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_OPT_NR, \
+ BCH_SB_META_CSUM_TYPE, \
+ true) \
+ BCH_OPT(data_checksum, \
+ bch_csum_types, \
+ 0, BCH_CSUM_OPT_NR, \
+ BCH_SB_DATA_CSUM_TYPE, \
+ true) \
+ BCH_OPT(compression, \
+ bch_compression_types, \
+ 0, BCH_COMPRESSION_NR, \
+ BCH_SB_COMPRESSION_TYPE, \
+ true) \
+ BCH_OPT(str_hash, \
+ bch_str_hash_types, \
+ 0, BCH_STR_HASH_NR, \
+ BCH_SB_STR_HASH_TYPE, \
+ true) \
+ BCH_OPT(inodes_32bit, \
+ bch_bool_opt, 0, 2, \
+ BCH_SB_INODE_32BIT, \
+ true) \
+ BCH_OPT(gc_reserve_percent, \
+ bch_uint_opt, \
+ 5, 21, \
+ BCH_SB_GC_RESERVE, \
+ false) \
+ BCH_OPT(root_reserve_percent, \
+ bch_uint_opt, \
+ 0, 100, \
+ BCH_SB_ROOT_RESERVE, \
+ false) \
+ BCH_OPT(wide_macs, \
+ bch_bool_opt, 0, 2, \
+ BCH_SB_128_BIT_MACS, \
+ true)
/* backing device specific stuff: */
@@ -908,7 +1114,7 @@ struct backingdev_sb {
uuid_le set_uuid;
__le64 set_magic;
};
- __u8 label[SB_LABEL_SIZE];
+ __u8 label[BCH_SB_LABEL_SIZE];
__le64 flags;
@@ -947,15 +1153,7 @@ LE64_BITMASK(BDEV_STATE, struct backingdev_sb, flags, 61, 63);
#define BDEV_STATE_DIRTY 2U
#define BDEV_STATE_STALE 3U
-static inline unsigned bch_journal_buckets_offset(struct cache_sb *sb)
-{
- return sb->nr_in_set * (sizeof(struct cache_member) / sizeof(__u64));
-}
-
-static inline unsigned bch_nr_journal_buckets(struct cache_sb *sb)
-{
- return __le16_to_cpu(sb->u64s) - bch_journal_buckets_offset(sb);
-}
+#define BDEV_DATA_START_DEFAULT 16 /* sectors */
static inline _Bool __SB_IS_BDEV(__u64 version)
{
@@ -963,7 +1161,7 @@ static inline _Bool __SB_IS_BDEV(__u64 version)
|| version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET;
}
-static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
+static inline _Bool SB_IS_BDEV(const struct bch_sb *sb)
{
return __SB_IS_BDEV(sb->version);
}
@@ -981,29 +1179,33 @@ static inline _Bool SB_IS_BDEV(const struct cache_sb *sb)
#define BCACHE_STATFS_MAGIC 0xca451a4e
-#define BCACHE_SB_MAGIC 0xca451a4ef67385c6ULL
-#define BCACHE_SB_MAGIC2 0x816dba487ff56582ULL
-#define JSET_MAGIC 0x245235c1a3625032ULL
-#define PSET_MAGIC 0x6750e15f87337f91ULL
-#define BSET_MAGIC 0x90135c78b99e07f5ULL
+#define JSET_MAGIC __cpu_to_le64(0x245235c1a3625032ULL)
+#define PSET_MAGIC __cpu_to_le64(0x6750e15f87337f91ULL)
+#define BSET_MAGIC __cpu_to_le64(0x90135c78b99e07f5ULL)
-static inline __u64 jset_magic(struct cache_sb *sb)
+static inline __le64 __bch_sb_magic(struct bch_sb *sb)
{
- return __le64_to_cpu(sb->set_magic) ^ JSET_MAGIC;
+ __le64 ret;
+ memcpy(&ret, &sb->uuid, sizeof(ret));
+ return ret;
}
-static inline __u64 pset_magic(struct cache_sb *sb)
+static inline __u64 __jset_magic(struct bch_sb *sb)
{
- return __le64_to_cpu(sb->set_magic) ^ PSET_MAGIC;
+ return __le64_to_cpu(__bch_sb_magic(sb) ^ JSET_MAGIC);
}
-static inline __u64 bset_magic(struct cache_sb *sb)
+static inline __u64 __pset_magic(struct bch_sb *sb)
{
- return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC;
+ return __le64_to_cpu(__bch_sb_magic(sb) ^ PSET_MAGIC);
}
-/* Journal */
+static inline __u64 __bset_magic(struct bch_sb *sb)
+{
+ return __le64_to_cpu(__bch_sb_magic(sb) ^ BSET_MAGIC);
+}
+/* Journal */
#define BCACHE_JSET_VERSION_UUIDv1 1
#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */
@@ -1054,24 +1256,29 @@ enum {
* version is for on disk format changes.
*/
struct jset {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
+ __le64 seq;
__le32 version;
__le32 flags;
- /* Sequence number of oldest dirty journal entry */
- __le64 seq;
- __le64 last_seq;
+ __le32 u64s; /* size of d[] in u64s */
+
+ __u8 encrypted_start[0];
__le16 read_clock;
__le16 write_clock;
- __le32 u64s; /* size of d[] in u64s */
+
+ /* Sequence number of oldest dirty journal entry */
+ __le64 last_seq;
+
union {
struct jset_entry start[0];
__u64 _data[0];
};
-};
+} __attribute__((packed));
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
@@ -1081,10 +1288,14 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
/* Bucket prios/gens */
struct prio_set {
- __le64 csum;
+ struct bch_csum csum;
+
__le64 magic;
- __le32 version;
- __le32 flags;
+ __le32 nonce[3];
+ __le16 version;
+ __le16 flags;
+
+ __u8 encrypted_start[0];
__le64 next_bucket;
@@ -1093,7 +1304,7 @@ struct prio_set {
__le16 write_prio;
__u8 gen;
} __attribute__((packed)) data[];
-};
+} __attribute__((packed));
LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
@@ -1155,28 +1366,49 @@ struct bset {
LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
-/* Only used in first bset */
-LE32_BITMASK(BSET_BTREE_LEVEL, struct bset, flags, 4, 8);
-
-LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9);
+LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 4, 5);
LE32_BITMASK(BSET_SEPARATE_WHITEOUTS,
- struct bset, flags, 9, 10);
+ struct bset, flags, 5, 6);
struct btree_node {
- __le64 csum;
+ struct bch_csum csum;
__le64 magic;
+ /* this flags field is encrypted, unlike bset->flags: */
+ __le64 flags;
+
/* Closed interval: */
struct bpos min_key;
struct bpos max_key;
+ struct bch_extent_ptr ptr;
struct bkey_format format;
+ union {
struct bset keys;
+ struct {
+ __u8 pad[22];
+ __le16 u64s;
+ __u64 _data[0];
+
+ };
+ };
} __attribute__((packed));
+LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
+LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
+
struct btree_node_entry {
- __le64 csum;
+ struct bch_csum csum;
+
+ union {
struct bset keys;
+ struct {
+ __u8 pad[22];
+ __le16 u64s;
+ __u64 _data[0];
+
+ };
+ };
} __attribute__((packed));
/* OBSOLETE */
@@ -1237,7 +1469,7 @@ struct jset_v0 {
__u16 btree_level;
__u16 pad[3];
- __u64 prio_bucket[MAX_CACHES_PER_SET];
+ __u64 prio_bucket[64];
union {
struct bkey start[0];
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index cb9ad24f..0dbeaaed 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -24,299 +24,81 @@
#include <linux/slab.h>
#include <linux/string.h>
-/*
- * Autoloaded crypto modules should only use a prefixed name to avoid allowing
- * arbitrary modules to be loaded. Loading from userspace may still need the
- * unprefixed names, so retains those aliases as well.
- * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3
- * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro
- * expands twice on the same line. Instead, use a separate base name for the
- * alias.
- */
-#define MODULE_ALIAS_CRYPTO(name) \
- __MODULE_INFO(alias, alias_userspace, name); \
- __MODULE_INFO(alias, alias_crypto, "crypto-" name)
-
-/*
- * Algorithm masks and types.
- */
#define CRYPTO_ALG_TYPE_MASK 0x0000000f
-#define CRYPTO_ALG_TYPE_CIPHER 0x00000001
-#define CRYPTO_ALG_TYPE_AEAD 0x00000003
#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004
-#define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005
-#define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005
-#define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006
-#define CRYPTO_ALG_TYPE_KPP 0x00000008
-#define CRYPTO_ALG_TYPE_RNG 0x0000000c
-#define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d
-#define CRYPTO_ALG_TYPE_DIGEST 0x0000000e
-#define CRYPTO_ALG_TYPE_HASH 0x0000000e
#define CRYPTO_ALG_TYPE_SHASH 0x0000000e
-#define CRYPTO_ALG_TYPE_AHASH 0x0000000f
-
-#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e
-#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e
#define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c
-
#define CRYPTO_ALG_ASYNC 0x00000080
-/*
- * Set this bit if and only if the algorithm requires another algorithm of
- * the same type to handle corner cases.
- */
-#define CRYPTO_ALG_NEED_FALLBACK 0x00000100
-
-/*
- * This bit is set for symmetric key ciphers that have already been wrapped
- * with a generic IV generator to prevent them from being wrapped again.
- */
-#define CRYPTO_ALG_GENIV 0x00000200
-
-/*
- * Set if the algorithm is an instance that is build from templates.
- */
-#define CRYPTO_ALG_INSTANCE 0x00000800
-
-/* Set this bit if the algorithm provided is hardware accelerated but
- * not available to userspace via instruction set or so.
- */
-#define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000
-
-/*
- * Mark a cipher as a service implementation only usable by another
- * cipher and never by a normal user of the kernel crypto API
- */
-#define CRYPTO_ALG_INTERNAL 0x00002000
-
-/*
- * Transform masks and values (for crt_flags).
- */
-#define CRYPTO_TFM_REQ_MASK 0x000fff00
-#define CRYPTO_TFM_RES_MASK 0xfff00000
-
-#define CRYPTO_TFM_REQ_WEAK_KEY 0x00000100
-#define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200
-#define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400
-#define CRYPTO_TFM_RES_WEAK_KEY 0x00100000
-#define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000
-#define CRYPTO_TFM_RES_BAD_KEY_SCHED 0x00400000
-#define CRYPTO_TFM_RES_BAD_BLOCK_LEN 0x00800000
-#define CRYPTO_TFM_RES_BAD_FLAGS 0x01000000
-
-/*
- * Miscellaneous stuff.
- */
#define CRYPTO_MAX_ALG_NAME 64
-/*
- * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual
- * declaration) is used to ensure that the crypto_tfm context structure is
- * aligned correctly for the given architecture so that there are no alignment
- * faults for C data types. In particular, this is required on platforms such
- * as arm where pointers are 32-bit aligned but there are data types such as
- * u64 which require 64-bit alignment.
- */
#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN
-
#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN)))
struct scatterlist;
struct crypto_blkcipher;
struct crypto_tfm;
struct crypto_type;
-struct skcipher_givcrypt_request;
struct blkcipher_desc {
- struct crypto_blkcipher *tfm;
- void *info;
- u32 flags;
-};
-
-struct cipher_desc {
- struct crypto_tfm *tfm;
- void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
- unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
- const u8 *src, unsigned int nbytes);
- void *info;
+ struct crypto_blkcipher *tfm;
+ void *info;
+ u32 flags;
};
struct blkcipher_alg {
int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
+ unsigned keylen);
int (*encrypt)(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes);
+ unsigned nbytes);
int (*decrypt)(struct blkcipher_desc *desc,
struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes);
-
- const char *geniv;
-
- unsigned int min_keysize;
- unsigned int max_keysize;
- unsigned int ivsize;
-};
-
-struct cipher_alg {
- unsigned int cia_min_keysize;
- unsigned int cia_max_keysize;
- int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
- void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
- void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-};
-
-struct compress_alg {
- int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen);
- int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src,
- unsigned int slen, u8 *dst, unsigned int *dlen);
+ unsigned nbytes);
};
-
#define cra_blkcipher cra_u.blkcipher
-#define cra_cipher cra_u.cipher
-#define cra_compress cra_u.compress
struct crypto_alg {
- struct list_head cra_list;
- struct list_head cra_users;
-
- u32 cra_flags;
- unsigned int cra_blocksize;
- unsigned int cra_ctxsize;
- unsigned int cra_alignmask;
-
- int cra_priority;
- atomic_t cra_refcnt;
+ struct list_head cra_list;
+ struct list_head cra_users;
- char cra_name[CRYPTO_MAX_ALG_NAME];
- char cra_driver_name[CRYPTO_MAX_ALG_NAME];
+ u32 cra_flags;
+ unsigned cra_ctxsize;
+ char cra_name[CRYPTO_MAX_ALG_NAME];
const struct crypto_type *cra_type;
union {
struct blkcipher_alg blkcipher;
- struct cipher_alg cipher;
- struct compress_alg compress;
} cra_u;
int (*cra_init)(struct crypto_tfm *tfm);
void (*cra_exit)(struct crypto_tfm *tfm);
- void (*cra_destroy)(struct crypto_alg *alg);
-
- struct module *cra_module;
} CRYPTO_MINALIGN_ATTR;
-/*
- * Algorithm registration interface.
- */
int crypto_register_alg(struct crypto_alg *alg);
-int crypto_unregister_alg(struct crypto_alg *alg);
-int crypto_register_algs(struct crypto_alg *algs, int count);
-int crypto_unregister_algs(struct crypto_alg *algs, int count);
-
-/*
- * Algorithm query interface.
- */
-int crypto_has_alg(const char *name, u32 type, u32 mask);
-
-/*
- * Transforms: user-instantiated objects which encapsulate algorithms
- * and core processing logic. Managed via crypto_alloc_*() and
- * crypto_free_*(), as well as the various helpers below.
- */
struct blkcipher_tfm {
- void *iv;
int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
+ unsigned keylen);
int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes);
+ struct scatterlist *src, unsigned nbytes);
int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes);
+ struct scatterlist *src, unsigned nbytes);
};
-struct cipher_tfm {
- int (*cit_setkey)(struct crypto_tfm *tfm,
- const u8 *key, unsigned int keylen);
- void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
- void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-};
-
-struct compress_tfm {
- int (*cot_compress)(struct crypto_tfm *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen);
- int (*cot_decompress)(struct crypto_tfm *tfm,
- const u8 *src, unsigned int slen,
- u8 *dst, unsigned int *dlen);
-};
-
-#define crt_blkcipher crt_u.blkcipher
-#define crt_cipher crt_u.cipher
-#define crt_compress crt_u.compress
-
struct crypto_tfm {
+ u32 crt_flags;
- u32 crt_flags;
-
- union {
- struct blkcipher_tfm blkcipher;
- struct cipher_tfm cipher;
- struct compress_tfm compress;
- } crt_u;
+ struct blkcipher_tfm crt_blkcipher;
void (*exit)(struct crypto_tfm *tfm);
- struct crypto_alg *__crt_alg;
-
- void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
-};
-
-struct crypto_blkcipher {
- struct crypto_tfm base;
-};
-
-struct crypto_cipher {
- struct crypto_tfm base;
-};
-
-struct crypto_comp {
- struct crypto_tfm base;
+ struct crypto_alg *__crt_alg;
+ void *__crt_ctx[] CRYPTO_MINALIGN_ATTR;
};
-enum {
- CRYPTOA_UNSPEC,
- CRYPTOA_ALG,
- CRYPTOA_TYPE,
- CRYPTOA_U32,
- __CRYPTOA_MAX,
-};
-
-#define CRYPTOA_MAX (__CRYPTOA_MAX - 1)
-
-/* Maximum number of (rtattr) parameters for each template. */
-#define CRYPTO_MAX_ATTRS 32
-
-struct crypto_attr_alg {
- char name[CRYPTO_MAX_ALG_NAME];
-};
-
-struct crypto_attr_type {
- u32 type;
- u32 mask;
-};
-
-struct crypto_attr_u32 {
- u32 num;
-};
-
-/*
- * Transform user interface.
- */
-
struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask);
void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm);
@@ -325,110 +107,19 @@ static inline void crypto_free_tfm(struct crypto_tfm *tfm)
return crypto_destroy_tfm(tfm, tfm);
}
-int alg_test(const char *driver, const char *alg, u32 type, u32 mask);
-
-/*
- * Transform helpers which query the underlying algorithm.
- */
-static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm)
-{
- return tfm->__crt_alg->cra_name;
-}
-
-static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm)
-{
- return tfm->__crt_alg->cra_driver_name;
-}
-
-static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm)
-{
- return tfm->__crt_alg->cra_priority;
-}
-
static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
{
return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
}
-static inline unsigned int crypto_tfm_alg_blocksize(struct crypto_tfm *tfm)
-{
- return tfm->__crt_alg->cra_blocksize;
-}
-
-static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
-{
- return tfm->__crt_alg->cra_alignmask;
-}
-
-static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm)
-{
- return tfm->crt_flags;
-}
-
-static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags)
-{
- tfm->crt_flags |= flags;
-}
-
-static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags)
-{
- tfm->crt_flags &= ~flags;
-}
-
static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
{
return tfm->__crt_ctx;
}
-static inline unsigned int crypto_tfm_ctx_alignment(void)
-{
- struct crypto_tfm *tfm;
- return __alignof__(tfm->__crt_ctx);
-}
-
-static inline u32 crypto_skcipher_type(u32 type)
-{
- type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
- type |= CRYPTO_ALG_TYPE_BLKCIPHER;
- return type;
-}
-
-static inline u32 crypto_skcipher_mask(u32 mask)
-{
- mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
- mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK;
- return mask;
-}
-
-/**
- * DOC: Synchronous Block Cipher API
- *
- * The synchronous block cipher API is used with the ciphers of type
- * CRYPTO_ALG_TYPE_BLKCIPHER (listed as type "blkcipher" in /proc/crypto)
- *
- * Synchronous calls, have a context in the tfm. But since a single tfm can be
- * used in multiple calls and in parallel, this info should not be changeable
- * (unless a lock is used). This applies, for example, to the symmetric key.
- * However, the IV is changeable, so there is an iv field in blkcipher_tfm
- * structure for synchronous blkcipher api. So, its the only state info that can
- * be kept for synchronous calls without using a big lock across a tfm.
- *
- * The block cipher API allows the use of a complete cipher, i.e. a cipher
- * consisting of a template (a block chaining mode) and a single block cipher
- * primitive (e.g. AES).
- *
- * The plaintext data buffer and the ciphertext data buffer are pointed to
- * by using scatter/gather lists. The cipher operation is performed
- * on all segments of the provided scatter/gather lists.
- *
- * The kernel crypto API supports a cipher operation "in-place" which means that
- * the caller may provide the same scatter/gather list for the plaintext and
- * cipher text. After the completion of the cipher operation, the plaintext
- * data is replaced with the ciphertext data in case of an encryption and vice
- * versa for a decryption. The caller must ensure that the scatter/gather lists
- * for the output data point to sufficiently large buffers, i.e. multiples of
- * the block size of the cipher.
- */
+struct crypto_blkcipher {
+ struct crypto_tfm base;
+};
static inline struct crypto_blkcipher *__crypto_blkcipher_cast(
struct crypto_tfm *tfm)
@@ -443,20 +134,6 @@ static inline struct crypto_blkcipher *crypto_blkcipher_cast(
return __crypto_blkcipher_cast(tfm);
}
-/**
- * crypto_alloc_blkcipher() - allocate synchronous block cipher handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- * blkcipher cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for a block cipher. The returned struct
- * crypto_blkcipher is the cipher handle that is required for any subsequent
- * API invocation for that block cipher.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- * of an error, PTR_ERR() returns the error code.
- */
static inline struct crypto_blkcipher *crypto_alloc_blkcipher(
const char *alg_name, u32 type, u32 mask)
{
@@ -467,455 +144,30 @@ static inline struct crypto_blkcipher *crypto_alloc_blkcipher(
return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask));
}
-static inline struct crypto_tfm *crypto_blkcipher_tfm(
- struct crypto_blkcipher *tfm)
-{
- return &tfm->base;
-}
-
-/**
- * crypto_free_blkcipher() - zeroize and free the block cipher handle
- * @tfm: cipher handle to be freed
- */
static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm)
{
- crypto_free_tfm(crypto_blkcipher_tfm(tfm));
-}
-
-/**
- * crypto_has_blkcipher() - Search for the availability of a block cipher
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- * block cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Return: true when the block cipher is known to the kernel crypto API; false
- * otherwise
- */
-static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask)
-{
- type &= ~CRYPTO_ALG_TYPE_MASK;
- type |= CRYPTO_ALG_TYPE_BLKCIPHER;
- mask |= CRYPTO_ALG_TYPE_MASK;
-
- return crypto_has_alg(alg_name, type, mask);
-}
-
-/**
- * crypto_blkcipher_name() - return the name / cra_name from the cipher handle
- * @tfm: cipher handle
- *
- * Return: The character string holding the name of the cipher
- */
-static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm)
-{
- return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm));
+ crypto_free_tfm(&tfm->base);
}
static inline struct blkcipher_tfm *crypto_blkcipher_crt(
struct crypto_blkcipher *tfm)
{
- return &crypto_blkcipher_tfm(tfm)->crt_blkcipher;
+ return &tfm->base.crt_blkcipher;
}
-static inline struct blkcipher_alg *crypto_blkcipher_alg(
- struct crypto_blkcipher *tfm)
-{
- return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher;
-}
-
-/**
- * crypto_blkcipher_ivsize() - obtain IV size
- * @tfm: cipher handle
- *
- * The size of the IV for the block cipher referenced by the cipher handle is
- * returned. This IV size may be zero if the cipher does not need an IV.
- *
- * Return: IV size in bytes
- */
-static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm)
-{
- return crypto_blkcipher_alg(tfm)->ivsize;
-}
-
-/**
- * crypto_blkcipher_blocksize() - obtain block size of cipher
- * @tfm: cipher handle
- *
- * The block size for the block cipher referenced with the cipher handle is
- * returned. The caller may use that information to allocate appropriate
- * memory for the data returned by the encryption or decryption operation.
- *
- * Return: block size of cipher
- */
-static inline unsigned int crypto_blkcipher_blocksize(
- struct crypto_blkcipher *tfm)
-{
- return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm));
-}
-
-static inline unsigned int crypto_blkcipher_alignmask(
- struct crypto_blkcipher *tfm)
-{
- return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm));
-}
-
-static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm)
-{
- return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm));
-}
-
-static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm,
- u32 flags)
-{
- crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags);
-}
-
-static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm,
- u32 flags)
-{
- crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags);
-}
-
-/**
- * crypto_blkcipher_setkey() - set key for cipher
- * @tfm: cipher handle
- * @key: buffer holding the key
- * @keylen: length of the key in bytes
- *
- * The caller provided key is set for the block cipher referenced by the cipher
- * handle.
- *
- * Note, the key length determines the cipher type. Many block ciphers implement
- * different cipher modes depending on the key size, such as AES-128 vs AES-192
- * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
- * is performed.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm,
- const u8 *key, unsigned int keylen)
+ const u8 *key, unsigned keylen)
{
- return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm),
- key, keylen);
+ return crypto_blkcipher_crt(tfm)->setkey(&tfm->base, key, keylen);
}
-/**
- * crypto_blkcipher_encrypt() - encrypt plaintext
- * @desc: reference to the block cipher handle with meta data
- * @dst: scatter/gather list that is filled by the cipher operation with the
- * ciphertext
- * @src: scatter/gather list that holds the plaintext
- * @nbytes: number of bytes of the plaintext to encrypt.
- *
- * Encrypt plaintext data using the IV set by the caller with a preceding
- * call of crypto_blkcipher_set_iv.
- *
- * The blkcipher_desc data structure must be filled by the caller and can
- * reside on the stack. The caller must fill desc as follows: desc.tfm is filled
- * with the block cipher handle; desc.flags is filled with either
- * CRYPTO_TFM_REQ_MAY_SLEEP or 0.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- */
-static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes)
-{
- desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
- return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
-}
-
-/**
- * crypto_blkcipher_encrypt_iv() - encrypt plaintext with dedicated IV
- * @desc: reference to the block cipher handle with meta data
- * @dst: scatter/gather list that is filled by the cipher operation with the
- * ciphertext
- * @src: scatter/gather list that holds the plaintext
- * @nbytes: number of bytes of the plaintext to encrypt.
- *
- * Encrypt plaintext data with the use of an IV that is solely used for this
- * cipher operation. Any previously set IV is not used.
- *
- * The blkcipher_desc data structure must be filled by the caller and can
- * reside on the stack. The caller must fill desc as follows: desc.tfm is filled
- * with the block cipher handle; desc.info is filled with the IV to be used for
- * the current operation; desc.flags is filled with either
- * CRYPTO_TFM_REQ_MAY_SLEEP or 0.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- */
static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc,
struct scatterlist *dst,
struct scatterlist *src,
- unsigned int nbytes)
+ unsigned nbytes)
{
return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
}
-/**
- * crypto_blkcipher_decrypt() - decrypt ciphertext
- * @desc: reference to the block cipher handle with meta data
- * @dst: scatter/gather list that is filled by the cipher operation with the
- * plaintext
- * @src: scatter/gather list that holds the ciphertext
- * @nbytes: number of bytes of the ciphertext to decrypt.
- *
- * Decrypt ciphertext data using the IV set by the caller with a preceding
- * call of crypto_blkcipher_set_iv.
- *
- * The blkcipher_desc data structure must be filled by the caller as documented
- * for the crypto_blkcipher_encrypt call above.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- *
- */
-static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes)
-{
- desc->info = crypto_blkcipher_crt(desc->tfm)->iv;
- return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
-}
-
-/**
- * crypto_blkcipher_decrypt_iv() - decrypt ciphertext with dedicated IV
- * @desc: reference to the block cipher handle with meta data
- * @dst: scatter/gather list that is filled by the cipher operation with the
- * plaintext
- * @src: scatter/gather list that holds the ciphertext
- * @nbytes: number of bytes of the ciphertext to decrypt.
- *
- * Decrypt ciphertext data with the use of an IV that is solely used for this
- * cipher operation. Any previously set IV is not used.
- *
- * The blkcipher_desc data structure must be filled by the caller as documented
- * for the crypto_blkcipher_encrypt_iv call above.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- */
-static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes)
-{
- return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
-}
-
-/**
- * crypto_blkcipher_set_iv() - set IV for cipher
- * @tfm: cipher handle
- * @src: buffer holding the IV
- * @len: length of the IV in bytes
- *
- * The caller provided IV is set for the block cipher referenced by the cipher
- * handle.
- */
-static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm,
- const u8 *src, unsigned int len)
-{
- memcpy(crypto_blkcipher_crt(tfm)->iv, src, len);
-}
-
-/**
- * crypto_blkcipher_get_iv() - obtain IV from cipher
- * @tfm: cipher handle
- * @dst: buffer filled with the IV
- * @len: length of the buffer dst
- *
- * The caller can obtain the IV set for the block cipher referenced by the
- * cipher handle and store it into the user-provided buffer. If the buffer
- * has an insufficient space, the IV is truncated to fit the buffer.
- */
-static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm,
- u8 *dst, unsigned int len)
-{
- memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len);
-}
-
-/**
- * DOC: Single Block Cipher API
- *
- * The single block cipher API is used with the ciphers of type
- * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto).
- *
- * Using the single block cipher API calls, operations with the basic cipher
- * primitive can be implemented. These cipher primitives exclude any block
- * chaining operations including IV handling.
- *
- * The purpose of this single block cipher API is to support the implementation
- * of templates or other concepts that only need to perform the cipher operation
- * on one block at a time. Templates invoke the underlying cipher primitive
- * block-wise and process either the input or the output data of these cipher
- * operations.
- */
-
-static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm)
-{
- return (struct crypto_cipher *)tfm;
-}
-
-static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm)
-{
- BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER);
- return __crypto_cipher_cast(tfm);
-}
-
-/**
- * crypto_alloc_cipher() - allocate single block cipher handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- * single block cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for a single block cipher. The returned struct
- * crypto_cipher is the cipher handle that is required for any subsequent API
- * invocation for that single block cipher.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- * of an error, PTR_ERR() returns the error code.
- */
-static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name,
- u32 type, u32 mask)
-{
- type &= ~CRYPTO_ALG_TYPE_MASK;
- type |= CRYPTO_ALG_TYPE_CIPHER;
- mask |= CRYPTO_ALG_TYPE_MASK;
-
- return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask));
-}
-
-static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm)
-{
- return &tfm->base;
-}
-
-/**
- * crypto_free_cipher() - zeroize and free the single block cipher handle
- * @tfm: cipher handle to be freed
- */
-static inline void crypto_free_cipher(struct crypto_cipher *tfm)
-{
- crypto_free_tfm(crypto_cipher_tfm(tfm));
-}
-
-/**
- * crypto_has_cipher() - Search for the availability of a single block cipher
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- * single block cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Return: true when the single block cipher is known to the kernel crypto API;
- * false otherwise
- */
-static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask)
-{
- type &= ~CRYPTO_ALG_TYPE_MASK;
- type |= CRYPTO_ALG_TYPE_CIPHER;
- mask |= CRYPTO_ALG_TYPE_MASK;
-
- return crypto_has_alg(alg_name, type, mask);
-}
-
-static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm)
-{
- return &crypto_cipher_tfm(tfm)->crt_cipher;
-}
-
-/**
- * crypto_cipher_blocksize() - obtain block size for cipher
- * @tfm: cipher handle
- *
- * The block size for the single block cipher referenced with the cipher handle
- * tfm is returned. The caller may use that information to allocate appropriate
- * memory for the data returned by the encryption or decryption operation
- *
- * Return: block size of cipher
- */
-static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm)
-{
- return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm));
-}
-
-static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm)
-{
- return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm));
-}
-
-static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm)
-{
- return crypto_tfm_get_flags(crypto_cipher_tfm(tfm));
-}
-
-static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm,
- u32 flags)
-{
- crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags);
-}
-
-static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm,
- u32 flags)
-{
- crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags);
-}
-
-/**
- * crypto_cipher_setkey() - set key for cipher
- * @tfm: cipher handle
- * @key: buffer holding the key
- * @keylen: length of the key in bytes
- *
- * The caller provided key is set for the single block cipher referenced by the
- * cipher handle.
- *
- * Note, the key length determines the cipher type. Many block ciphers implement
- * different cipher modes depending on the key size, such as AES-128 vs AES-192
- * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
- * is performed.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-static inline int crypto_cipher_setkey(struct crypto_cipher *tfm,
- const u8 *key, unsigned int keylen)
-{
- return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm),
- key, keylen);
-}
-
-/**
- * crypto_cipher_encrypt_one() - encrypt one block of plaintext
- * @tfm: cipher handle
- * @dst: points to the buffer that will be filled with the ciphertext
- * @src: buffer holding the plaintext to be encrypted
- *
- * Invoke the encryption operation of one block. The caller must ensure that
- * the plaintext and ciphertext buffers are at least one block in size.
- */
-static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
- u8 *dst, const u8 *src)
-{
- crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm),
- dst, src);
-}
-
-/**
- * crypto_cipher_decrypt_one() - decrypt one block of ciphertext
- * @tfm: cipher handle
- * @dst: points to the buffer that will be filled with the plaintext
- * @src: buffer holding the ciphertext to be decrypted
- *
- * Invoke the decryption operation of one block. The caller must ensure that
- * the plaintext and ciphertext buffers are at least one block in size.
- */
-static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
- u8 *dst, const u8 *src)
-{
- crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm),
- dst, src);
-}
-
#endif /* _LINUX_CRYPTO_H */
diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h
deleted file mode 100644
index 8dfcb83b..00000000
--- a/include/linux/cryptohash.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __CRYPTOHASH_H
-#define __CRYPTOHASH_H
-
-#include <linux/types.h>
-
-#define SHA_DIGEST_WORDS 5
-#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8)
-#define SHA_WORKSPACE_WORDS 16
-
-void sha_init(__u32 *buf);
-void sha_transform(__u32 *digest, const char *data, __u32 *W);
-
-#define MD5_DIGEST_WORDS 4
-#define MD5_MESSAGE_BYTES 64
-
-void md5_transform(__u32 *hash, __u32 const *in);
-
-__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]);
-
-#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2233350b..ac72858b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -207,6 +207,4 @@ int __must_check kstrtoint(const char *s, unsigned int base, int *res);
BUILD_BUG_ON_ZERO((perms) & 2) + \
(perms))
-#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
-
#endif
diff --git a/include/linux/key.h b/include/linux/key.h
new file mode 100644
index 00000000..adc12a9e
--- /dev/null
+++ b/include/linux/key.h
@@ -0,0 +1,50 @@
+#ifndef _LINUX_KEY_H
+#define _LINUX_KEY_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
+#include <linux/sysctl.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+
+#include <keyutils.h>
+
+struct key;
+
+struct user_key_payload {
+ size_t datalen; /* length of this data */
+ char data[0]; /* actual data */
+};
+
+struct key {
+ atomic_t usage; /* number of references */
+ key_serial_t serial; /* key serial number */
+ struct rw_semaphore sem; /* change vs change sem */
+ struct user_key_payload payload;
+};
+
+static inline const struct user_key_payload *user_key_payload(const struct key *key)
+{
+ return &key->payload;
+}
+
+static inline void key_put(struct key *key)
+{
+ if (atomic_dec_and_test(&key->usage))
+ free(key);
+}
+
+static inline struct key *__key_get(struct key *key)
+{
+ atomic_inc(&key->usage);
+ return key;
+}
+
+static inline struct key *key_get(struct key *key)
+{
+ return key ? __key_get(key) : key;
+}
+
+#endif /* _LINUX_KEY_H */
diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index c2789f93..ddf6f941 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -14,6 +14,11 @@ typedef struct mempool_s {
size_t elem_size;
} mempool_t;
+static inline bool mempool_initialized(mempool_t *pool)
+{
+ return true;
+}
+
extern int mempool_resize(mempool_t *pool, int new_min_nr);
static inline void mempool_free(void *element, mempool_t *pool)
diff --git a/include/linux/page.h b/include/linux/page.h
index c99d9de3..8d6413ce 100644
--- a/include/linux/page.h
+++ b/include/linux/page.h
@@ -5,8 +5,11 @@
struct page;
-#define virt_to_page(kaddr) ((struct page *) (kaddr))
-#define page_address(kaddr) ((void *) (kaddr))
+#define virt_to_page(p) \
+ ((struct page *) (((unsigned long) (p)) & PAGE_MASK))
+#define offset_in_page(p) ((unsigned long) (p) & ~PAGE_MASK)
+
+#define page_address(p) ((void *) (p))
#define kmap_atomic(page) page_address(page)
#define kunmap_atomic(addr) do {} while (0)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
new file mode 100644
index 00000000..04bf59df
--- /dev/null
+++ b/include/linux/scatterlist.h
@@ -0,0 +1,111 @@
+#ifndef _LINUX_SCATTERLIST_H
+#define _LINUX_SCATTERLIST_H
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+};
+
+#define sg_is_chain(sg) ((sg)->page_link & 0x01)
+#define sg_is_last(sg) ((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+ unsigned long page_link = sg->page_link & 0x3;
+
+ /*
+ * In order for the low bit stealing approach to work, pages
+ * must be aligned at a 32-bit boundary as a minimum.
+ */
+ BUG_ON((unsigned long) page & 0x03);
+ sg->page_link = page_link | (unsigned long) page;
+}
+
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ sg_assign_page(sg, page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+ return (struct page *)((sg)->page_link & ~0x3);
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+
+#define for_each_sg(sglist, sg, nr, __i) \
+ for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ prv[prv_nents - 1].offset = 0;
+ prv[prv_nents - 1].length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+ sg->page_link |= 0x02;
+ sg->page_link &= ~0x01;
+}
+
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+ sg->page_link &= ~0x02;
+}
+
+static inline void *sg_virt(struct scatterlist *sg)
+{
+ return page_address(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+ sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline void sg_init_one(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+
+#endif /* _LINUX_SCATTERLIST_H */
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 2e1ad82e..2d9f8291 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -38,6 +38,19 @@ struct itimerspec64 {
#define KTIME_MAX ((s64)~((u64)1 << 63))
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+static inline struct timespec ns_to_timespec(const u64 nsec)
+{
+ return (struct timespec) {
+ .tv_sec = nsec / NSEC_PER_SEC,
+ .tv_nsec = nsec % NSEC_PER_SEC,
+ };
+}
+
+static inline s64 timespec_to_ns(const struct timespec *ts)
+{
+ return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
#if __BITS_PER_LONG == 64
static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
@@ -61,11 +74,6 @@ static inline struct timespec64 timespec_to_timespec64(const struct timespec ts)
# define ns_to_timespec64 ns_to_timespec
# define timespec64_add_ns timespec_add_ns
-static inline s64 timespec_to_ns(const struct timespec *ts)
-{
- return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
-}
-
#else
static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64)
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index d4968c54..01e4b79d 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -185,7 +185,7 @@ TRACE_EVENT(bcache_write,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->inode = inode;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
@@ -215,7 +215,7 @@ TRACE_EVENT(bcache_write_throttle,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->inode = inode;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
@@ -245,7 +245,7 @@ DECLARE_EVENT_CLASS(page_alloc_fail,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->size = size;
),
@@ -263,7 +263,7 @@ DECLARE_EVENT_CLASS(cache_set,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
),
TP_printk("%pU", __entry->uuid)
@@ -285,7 +285,7 @@ TRACE_EVENT(bcache_journal_next_bucket,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->cur_idx = cur_idx;
__entry->last_idx = last_idx;
),
@@ -304,7 +304,7 @@ TRACE_EVENT(bcache_journal_write_oldest,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->seq = seq;
),
@@ -322,7 +322,7 @@ TRACE_EVENT(bcache_journal_write_oldest_done,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->seq = seq;
__entry->written = written;
),
@@ -368,7 +368,7 @@ DECLARE_EVENT_CLASS(cache,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->tier = ca->mi.tier;
),
@@ -418,7 +418,7 @@ DECLARE_EVENT_CLASS(btree_node,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->bucket = PTR_BUCKET_NR_TRACE(c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
@@ -471,7 +471,7 @@ TRACE_EVENT(bcache_btree_node_alloc_fail,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->id = id;
),
@@ -514,7 +514,7 @@ TRACE_EVENT(bcache_mca_scan,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->touched = touched;
__entry->freed = freed;
__entry->can_free = can_free;
@@ -535,7 +535,7 @@ DECLARE_EVENT_CLASS(mca_cannibalize_lock,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
),
TP_printk("%pU", __entry->uuid)
@@ -675,7 +675,7 @@ TRACE_EVENT(bcache_btree_gc_coalesce_fail,
TP_fast_assign(
__entry->reason = reason;
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->disk_sb->user_uuid.b, 16);
),
TP_printk("%pU: %u", __entry->uuid, __entry->reason)
@@ -696,7 +696,7 @@ TRACE_EVENT(bcache_btree_node_alloc_replacement,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->old_bucket = PTR_BUCKET_NR_TRACE(c,
&old->key, 0);
__entry->bucket = PTR_BUCKET_NR_TRACE(c, &b->key, 0);
@@ -778,7 +778,7 @@ TRACE_EVENT(bcache_mark_bucket,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->inode = k->p.inode;
__entry->offset = k->p.offset;
__entry->sectors = sectors;
@@ -804,7 +804,7 @@ TRACE_EVENT(bcache_alloc_batch,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->free = free;
__entry->total = total;
),
@@ -824,7 +824,7 @@ TRACE_EVENT(bcache_btree_reserve_get_fail,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->required = required;
__entry->cl = cl;
),
@@ -879,7 +879,7 @@ DECLARE_EVENT_CLASS(cache_bucket_alloc,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->reserve = reserve;
),
@@ -908,7 +908,7 @@ DECLARE_EVENT_CLASS(cache_set_bucket_alloc,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->reserve = reserve;
__entry->cl = cl;
),
@@ -933,7 +933,7 @@ DECLARE_EVENT_CLASS(open_bucket_alloc,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->cl = cl;
),
@@ -1054,7 +1054,7 @@ TRACE_EVENT(bcache_moving_gc_end,
),
TP_fast_assign(
- memcpy(__entry->uuid, ca->disk_sb.sb->disk_uuid.b, 16);
+ memcpy(__entry->uuid, ca->uuid.b, 16);
__entry->sectors_moved = sectors_moved;
__entry->keys_moved = keys_moved;
__entry->buckets_moved = buckets_moved;
@@ -1114,7 +1114,7 @@ TRACE_EVENT(bcache_tiering_end,
),
TP_fast_assign(
- memcpy(__entry->uuid, c->disk_sb.user_uuid.b, 16);
+ memcpy(__entry->uuid, c->sb.user_uuid.b, 16);
__entry->sectors_moved = sectors_moved;
__entry->keys_moved = keys_moved;
),
diff --git a/libbcache.c b/libbcache.c
index 802d3b4c..cc294bd4 100644
--- a/libbcache.c
+++ b/libbcache.c
@@ -7,6 +7,7 @@
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <time.h>
#include <unistd.h>
#include <uuid/uuid.h>
@@ -14,30 +15,17 @@
#include "linux/bcache.h"
#include "libbcache.h"
#include "checksum.h"
+#include "crypto.h"
#include "opts.h"
+#include "super-io.h"
+
+#define NSEC_PER_SEC 1000000000L
#define BCH_MIN_NR_NBUCKETS (1 << 10)
/* first bucket should start 1 mb in, in sectors: */
#define FIRST_BUCKET_OFFSET (1 << 11)
-void __do_write_sb(int fd, void *sb, size_t bytes)
-{
- char zeroes[SB_SECTOR << 9] = {0};
-
- /* Zero start of disk */
- xpwrite(fd, zeroes, SB_SECTOR << 9, 0);
-
- /* Write superblock */
- xpwrite(fd, sb, bytes, SB_SECTOR << 9);
-
- fsync(fd);
- close(fd);
-}
-
-#define do_write_sb(_fd, _sb) \
- __do_write_sb(_fd, _sb, ((void *) __bset_bkey_last(_sb)) - (void *) _sb);
-
/* minimum size filesystem we can create, given a bucket size: */
static u64 min_size(unsigned bucket_size)
{
@@ -45,12 +33,26 @@ static u64 min_size(unsigned bucket_size)
BCH_MIN_NR_NBUCKETS) * bucket_size;
}
+static void init_layout(struct bch_sb_layout *l)
+{
+ memset(l, 0, sizeof(*l));
+
+ l->magic = BCACHE_MAGIC;
+ l->layout_type = 0;
+ l->nr_superblocks = 2;
+ l->sb_max_size_bits = 7;
+ l->sb_offset[0] = cpu_to_le64(BCH_SB_SECTOR);
+ l->sb_offset[1] = cpu_to_le64(BCH_SB_SECTOR +
+ (1 << l->sb_max_size_bits));
+}
+
void bcache_format(struct dev_opts *devs, size_t nr_devs,
unsigned block_size,
unsigned btree_node_size,
unsigned meta_csum_type,
unsigned data_csum_type,
unsigned compression_type,
+ const char *passphrase,
unsigned meta_replicas,
unsigned data_replicas,
unsigned on_error_action,
@@ -58,8 +60,10 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
char *label,
uuid_le uuid)
{
- struct cache_sb *sb;
+ struct bch_sb *sb;
struct dev_opts *i;
+ struct bch_sb_field_members *mi;
+ unsigned u64s, j;
/* calculate block size: */
if (!block_size)
@@ -124,16 +128,20 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
max_journal_entry_size = roundup_pow_of_two(max_journal_entry_size);
- sb = calloc(1, sizeof(*sb) + sizeof(struct cache_member) * nr_devs);
+ sb = calloc(1, sizeof(*sb) +
+ sizeof(struct bch_sb_field_members) +
+ sizeof(struct bch_member) * nr_devs +
+ sizeof(struct bch_sb_field_crypt));
- sb->offset = __cpu_to_le64(SB_SECTOR);
- sb->version = __cpu_to_le64(BCACHE_SB_VERSION_CDEV_V3);
+ sb->version = cpu_to_le64(BCACHE_SB_VERSION_CDEV_V4);
sb->magic = BCACHE_MAGIC;
- sb->block_size = __cpu_to_le16(block_size);
+ sb->block_size = cpu_to_le16(block_size);
sb->user_uuid = uuid;
- sb->nr_in_set = nr_devs;
+ sb->nr_devices = nr_devs;
+
+ init_layout(&sb->layout);
- uuid_generate(sb->set_uuid.b);
+ uuid_generate(sb->uuid.b);
if (label)
strncpy((char *) sb->label, label, sizeof(sb->label));
@@ -142,44 +150,85 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
* don't have a userspace crc32c implementation handy, just always use
* crc64
*/
- SET_CACHE_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
- SET_CACHE_SET_META_PREFERRED_CSUM_TYPE(sb, meta_csum_type);
- SET_CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb, data_csum_type);
- SET_CACHE_SET_COMPRESSION_TYPE(sb, compression_type);
-
- SET_CACHE_SET_BTREE_NODE_SIZE(sb, btree_node_size);
- SET_CACHE_SET_META_REPLICAS_WANT(sb, meta_replicas);
- SET_CACHE_SET_META_REPLICAS_HAVE(sb, meta_replicas);
- SET_CACHE_SET_DATA_REPLICAS_WANT(sb, data_replicas);
- SET_CACHE_SET_DATA_REPLICAS_HAVE(sb, data_replicas);
- SET_CACHE_SET_ERROR_ACTION(sb, on_error_action);
- SET_CACHE_SET_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
- SET_CACHE_SET_JOURNAL_ENTRY_SIZE(sb, ilog2(max_journal_entry_size));
+ SET_BCH_SB_CSUM_TYPE(sb, BCH_CSUM_CRC64);
+ SET_BCH_SB_META_CSUM_TYPE(sb, meta_csum_type);
+ SET_BCH_SB_DATA_CSUM_TYPE(sb, data_csum_type);
+ SET_BCH_SB_COMPRESSION_TYPE(sb, compression_type);
+
+ SET_BCH_SB_BTREE_NODE_SIZE(sb, btree_node_size);
+ SET_BCH_SB_GC_RESERVE(sb, 8);
+ SET_BCH_SB_META_REPLICAS_WANT(sb, meta_replicas);
+ SET_BCH_SB_META_REPLICAS_HAVE(sb, meta_replicas);
+ SET_BCH_SB_DATA_REPLICAS_WANT(sb, data_replicas);
+ SET_BCH_SB_DATA_REPLICAS_HAVE(sb, data_replicas);
+ SET_BCH_SB_ERROR_ACTION(sb, on_error_action);
+ SET_BCH_SB_STR_HASH_TYPE(sb, BCH_STR_HASH_SIPHASH);
+ SET_BCH_SB_JOURNAL_ENTRY_SIZE(sb, ilog2(max_journal_entry_size));
+
+ struct timespec now;
+ if (clock_gettime(CLOCK_REALTIME, &now))
+ die("error getting current time: %s", strerror(errno));
+
+ sb->time_base_lo = cpu_to_le64(now.tv_sec * NSEC_PER_SEC + now.tv_nsec);
+ sb->time_precision = cpu_to_le32(1);
+
+ if (passphrase) {
+ struct bch_sb_field_crypt *crypt = vstruct_end(sb);
+
+ u64s = sizeof(struct bch_sb_field_crypt) / sizeof(u64);
+
+ le32_add_cpu(&sb->u64s, u64s);
+ crypt->field.u64s = cpu_to_le32(u64s);
+ crypt->field.type = BCH_SB_FIELD_crypt;
+
+ bch_sb_crypt_init(sb, crypt, passphrase);
+ SET_BCH_SB_ENCRYPTION_TYPE(sb, 1);
+ }
+
+ mi = vstruct_end(sb);
+ u64s = (sizeof(struct bch_sb_field_members) +
+ sizeof(struct bch_member) * nr_devs) / sizeof(u64);
+
+ le32_add_cpu(&sb->u64s, u64s);
+ mi->field.u64s = cpu_to_le32(u64s);
+ mi->field.type = BCH_SB_FIELD_members;
for (i = devs; i < devs + nr_devs; i++) {
- struct cache_member *m = sb->members + (i - devs);
+ struct bch_member *m = mi->members + (i - devs);
uuid_generate(m->uuid.b);
- m->nbuckets = __cpu_to_le64(i->nbuckets);
- m->first_bucket = __cpu_to_le16(i->first_bucket);
- m->bucket_size = __cpu_to_le16(i->bucket_size);
+ m->nbuckets = cpu_to_le64(i->nbuckets);
+ m->first_bucket = cpu_to_le16(i->first_bucket);
+ m->bucket_size = cpu_to_le16(i->bucket_size);
- SET_CACHE_TIER(m, i->tier);
- SET_CACHE_REPLACEMENT(m, CACHE_REPLACEMENT_LRU);
- SET_CACHE_DISCARD(m, i->discard);
+ SET_BCH_MEMBER_TIER(m, i->tier);
+ SET_BCH_MEMBER_REPLACEMENT(m, CACHE_REPLACEMENT_LRU);
+ SET_BCH_MEMBER_DISCARD(m, i->discard);
}
- sb->u64s = __cpu_to_le16(bch_journal_buckets_offset(sb));
-
for (i = devs; i < devs + nr_devs; i++) {
- struct cache_member *m = sb->members + (i - devs);
+ sb->dev_idx = i - devs;
+
+ static const char zeroes[BCH_SB_SECTOR << 9];
+ struct nonce nonce = { 0 };
+
+ /* Zero start of disk */
+ xpwrite(i->fd, zeroes, BCH_SB_SECTOR << 9, 0);
+
+ xpwrite(i->fd, &sb->layout, sizeof(sb->layout),
+ BCH_SB_LAYOUT_SECTOR << 9);
- sb->disk_uuid = m->uuid;
- sb->nr_this_dev = i - devs;
- sb->csum = __cpu_to_le64(__csum_set(sb, __le16_to_cpu(sb->u64s),
- CACHE_SB_CSUM_TYPE(sb)));
+ for (j = 0; j < sb->layout.nr_superblocks; j++) {
+ sb->offset = sb->layout.sb_offset[j];
- do_write_sb(i->fd, sb);
+ sb->csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb),
+ nonce, sb);
+ xpwrite(i->fd, sb, vstruct_bytes(sb),
+ le64_to_cpu(sb->offset) << 9);
+ }
+
+ fsync(i->fd);
+ close(i->fd);
}
bcache_super_print(sb, HUMAN_READABLE);
@@ -187,16 +236,39 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
free(sb);
}
-void bcache_super_print(struct cache_sb *sb, int units)
+struct bch_sb *bcache_super_read(const char *path)
{
- unsigned i;
+ struct bch_sb sb, *ret;
+
+ int fd = open(path, O_RDONLY);
+ if (fd < 0)
+ die("couldn't open %s", path);
+
+ xpread(fd, &sb, sizeof(sb), BCH_SB_SECTOR << 9);
+
+ if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
+ die("not a bcache superblock");
+
+ size_t bytes = vstruct_bytes(&sb);
+
+ ret = malloc(bytes);
+
+ xpread(fd, ret, bytes, BCH_SB_SECTOR << 9);
+
+ return ret;
+}
+
+void bcache_super_print(struct bch_sb *sb, int units)
+{
+ struct bch_sb_field_members *mi;
char user_uuid_str[40], internal_uuid_str[40], member_uuid_str[40];
- char label[SB_LABEL_SIZE + 1];
+ char label[BCH_SB_LABEL_SIZE + 1];
+ unsigned i;
memset(label, 0, sizeof(label));
memcpy(label, sb->label, sizeof(sb->label));
uuid_unparse(sb->user_uuid.b, user_uuid_str);
- uuid_unparse(sb->set_uuid.b, internal_uuid_str);
+ uuid_unparse(sb->uuid.b, internal_uuid_str);
printf("External UUID: %s\n"
"Internal UUID: %s\n"
@@ -226,44 +298,50 @@ void bcache_super_print(struct cache_sb *sb, int units)
label,
le64_to_cpu(sb->version),
pr_units(le16_to_cpu(sb->block_size), units),
- pr_units(CACHE_SET_BTREE_NODE_SIZE(sb), units),
- pr_units(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb), units),
+ pr_units(BCH_SB_BTREE_NODE_SIZE(sb), units),
+ pr_units(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb), units),
- CACHE_SET_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
- ? bch_error_actions[CACHE_SET_ERROR_ACTION(sb)]
+ BCH_SB_ERROR_ACTION(sb) < BCH_NR_ERROR_ACTIONS
+ ? bch_error_actions[BCH_SB_ERROR_ACTION(sb)]
: "unknown",
- CACHE_SET_CLEAN(sb),
+ BCH_SB_CLEAN(sb),
- CACHE_SET_META_REPLICAS_HAVE(sb),
- CACHE_SET_META_REPLICAS_WANT(sb),
- CACHE_SET_DATA_REPLICAS_HAVE(sb),
- CACHE_SET_DATA_REPLICAS_WANT(sb),
+ BCH_SB_META_REPLICAS_HAVE(sb),
+ BCH_SB_META_REPLICAS_WANT(sb),
+ BCH_SB_DATA_REPLICAS_HAVE(sb),
+ BCH_SB_DATA_REPLICAS_WANT(sb),
- CACHE_SET_META_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR
- ? bch_csum_types[CACHE_SET_META_PREFERRED_CSUM_TYPE(sb)]
+ BCH_SB_META_CSUM_TYPE(sb) < BCH_CSUM_NR
+ ? bch_csum_types[BCH_SB_META_CSUM_TYPE(sb)]
: "unknown",
- CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb) < BCH_CSUM_NR
- ? bch_csum_types[CACHE_SET_DATA_PREFERRED_CSUM_TYPE(sb)]
+ BCH_SB_DATA_CSUM_TYPE(sb) < BCH_CSUM_NR
+ ? bch_csum_types[BCH_SB_DATA_CSUM_TYPE(sb)]
: "unknown",
- CACHE_SET_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
- ? bch_compression_types[CACHE_SET_COMPRESSION_TYPE(sb)]
+ BCH_SB_COMPRESSION_TYPE(sb) < BCH_COMPRESSION_NR
+ ? bch_compression_types[BCH_SB_COMPRESSION_TYPE(sb)]
: "unknown",
- CACHE_SET_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
- ? bch_str_hash_types[CACHE_SET_STR_HASH_TYPE(sb)]
+ BCH_SB_STR_HASH_TYPE(sb) < BCH_STR_HASH_NR
+ ? bch_str_hash_types[BCH_SB_STR_HASH_TYPE(sb)]
: "unknown",
- CACHE_INODE_32BIT(sb),
- CACHE_SET_GC_RESERVE(sb),
- CACHE_SET_ROOT_RESERVE(sb),
+ BCH_SB_INODE_32BIT(sb),
+ BCH_SB_GC_RESERVE(sb),
+ BCH_SB_ROOT_RESERVE(sb),
- sb->nr_in_set);
+ sb->nr_devices);
- for (i = 0; i < sb->nr_in_set; i++) {
- struct cache_member *m = sb->members + i;
+ mi = bch_sb_get_members(sb);
+ if (!mi) {
+ printf("Member info section missing\n");
+ return;
+ }
+
+ for (i = 0; i < sb->nr_devices; i++) {
+ struct bch_member *m = mi->members + i;
time_t last_mount = le64_to_cpu(m->last_mount);
uuid_unparse(m->uuid.b, member_uuid_str);
@@ -290,41 +368,18 @@ void bcache_super_print(struct cache_sb *sb, int units)
le64_to_cpu(m->nbuckets),
last_mount ? ctime(&last_mount) : "(never)",
- CACHE_STATE(m) < CACHE_STATE_NR
- ? bch_cache_state[CACHE_STATE(m)]
+ BCH_MEMBER_STATE(m) < BCH_MEMBER_STATE_NR
+ ? bch_cache_state[BCH_MEMBER_STATE(m)]
: "unknown",
- CACHE_TIER(m),
- CACHE_HAS_METADATA(m),
- CACHE_HAS_DATA(m),
+ BCH_MEMBER_TIER(m),
+ BCH_MEMBER_HAS_METADATA(m),
+ BCH_MEMBER_HAS_DATA(m),
- CACHE_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
- ? bch_cache_replacement_policies[CACHE_REPLACEMENT(m)]
+ BCH_MEMBER_REPLACEMENT(m) < CACHE_REPLACEMENT_NR
+ ? bch_cache_replacement_policies[BCH_MEMBER_REPLACEMENT(m)]
: "unknown",
- CACHE_DISCARD(m));
+ BCH_MEMBER_DISCARD(m));
}
}
-
-struct cache_sb *bcache_super_read(const char *path)
-{
- struct cache_sb sb, *ret;
- size_t bytes;
-
- int fd = open(path, O_RDONLY);
- if (fd < 0)
- die("couldn't open %s", path);
-
- xpread(fd, &sb, sizeof(sb), SB_SECTOR << 9);
-
- if (memcmp(&sb.magic, &BCACHE_MAGIC, sizeof(sb.magic)))
- die("not a bcache superblock");
-
- bytes = sizeof(sb) + le16_to_cpu(sb.u64s) * sizeof(u64);
-
- ret = calloc(1, bytes);
-
- xpread(fd, ret, bytes, SB_SECTOR << 9);
-
- return ret;
-}
diff --git a/libbcache.h b/libbcache.h
index 07329cd1..6ec3f42d 100644
--- a/libbcache.h
+++ b/libbcache.h
@@ -2,6 +2,8 @@
#define _LIBBCACHE_H
#include <linux/uuid.h>
+#include "tools-util.h"
+#include "vstructs.h"
#include "stdbool.h"
#include "tools-util.h"
@@ -34,6 +36,7 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
unsigned meta_csum_type,
unsigned data_csum_type,
unsigned compression_type,
+ const char *passphrase,
unsigned meta_replicas,
unsigned data_replicas,
unsigned on_error_action,
@@ -41,8 +44,8 @@ void bcache_format(struct dev_opts *devs, size_t nr_devs,
char *label,
uuid_le uuid);
-void bcache_super_print(struct cache_sb *, int);
+struct bch_sb *bcache_super_read(const char *);
-struct cache_sb *bcache_super_read(const char *);
+void bcache_super_print(struct bch_sb *, int);
#endif /* _LIBBCACHE_H */
diff --git a/libbcache/acl.c b/libbcache/acl.c
index 64d56165..468d98da 100644
--- a/libbcache/acl.c
+++ b/libbcache/acl.c
@@ -187,7 +187,7 @@ int bch_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (ret < 0)
return ret;
else {
- inode->i_ctime = CURRENT_TIME_SEC;
+ inode->i_ctime = current_fs_time(inode->i_sb);
mark_inode_dirty(inode);
if (ret == 0)
acl = NULL;
diff --git a/libbcache/alloc.c b/libbcache/alloc.c
index 4fe08b57..cd22c381 100644
--- a/libbcache/alloc.c
+++ b/libbcache/alloc.c
@@ -64,7 +64,7 @@
#include "extents.h"
#include "io.h"
#include "journal.h"
-#include "super.h"
+#include "super-io.h"
#include <linux/blkdev.h>
#include <linux/kthread.h>
@@ -105,7 +105,7 @@ void bch_cache_group_add_cache(struct cache_group *grp, struct cache *ca)
if (rcu_access_pointer(grp->d[i].dev) == ca)
goto out;
- BUG_ON(grp->nr_devices >= MAX_CACHES_PER_SET);
+ BUG_ON(grp->nr_devices >= BCH_SB_MEMBERS_MAX);
rcu_assign_pointer(grp->d[grp->nr_devices++].dev, ca);
out:
@@ -124,9 +124,9 @@ static void pd_controllers_update(struct work_struct *work)
int i;
/* All units are in bytes */
- u64 tier_size[CACHE_TIERS];
- u64 tier_free[CACHE_TIERS];
- u64 tier_dirty[CACHE_TIERS];
+ u64 tier_size[BCH_TIER_MAX];
+ u64 tier_free[BCH_TIER_MAX];
+ u64 tier_dirty[BCH_TIER_MAX];
u64 tier0_can_free = 0;
memset(tier_size, 0, sizeof(tier_size));
@@ -134,7 +134,7 @@ static void pd_controllers_update(struct work_struct *work)
memset(tier_dirty, 0, sizeof(tier_dirty));
rcu_read_lock();
- for (i = CACHE_TIERS - 1; i >= 0; --i)
+ for (i = BCH_TIER_MAX - 1; i >= 0; --i)
group_for_each_cache_rcu(ca, &c->cache_tiers[i], iter) {
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
unsigned bucket_bits = ca->bucket_bits + 9;
@@ -246,6 +246,16 @@ static int prio_io(struct cache *ca, uint64_t bucket, int op)
return submit_bio_wait(ca->bio_prio);
}
+static struct nonce prio_nonce(struct prio_set *p)
+{
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = p->nonce[0],
+ [2] = p->nonce[1],
+ [3] = p->nonce[2]^BCH_NONCE_PRIO,
+ }};
+}
+
static int bch_prio_write(struct cache *ca)
{
struct cache_set *c = ca->set;
@@ -279,12 +289,8 @@ static int bch_prio_write(struct cache *ca)
}
p->next_bucket = cpu_to_le64(ca->prio_buckets[i + 1]);
- p->magic = cpu_to_le64(pset_magic(&c->disk_sb));
-
- SET_PSET_CSUM_TYPE(p, c->opts.metadata_checksum);
- p->csum = cpu_to_le64(bch_checksum(PSET_CSUM_TYPE(p),
- &p->magic,
- bucket_bytes(ca) - 8));
+ p->magic = cpu_to_le64(pset_magic(c));
+ get_random_bytes(&p->nonce, sizeof(p->nonce));
spin_lock(&ca->prio_buckets_lock);
r = bch_bucket_alloc(ca, RESERVE_PRIO);
@@ -298,6 +304,19 @@ static int bch_prio_write(struct cache *ca)
bch_mark_metadata_bucket(ca, ca->buckets + r, false);
spin_unlock(&ca->prio_buckets_lock);
+ SET_PSET_CSUM_TYPE(p, bch_meta_checksum_type(c));
+
+ bch_encrypt(c, PSET_CSUM_TYPE(p),
+ prio_nonce(p),
+ p->encrypted_start,
+ bucket_bytes(ca) -
+ offsetof(struct prio_set, encrypted_start));
+
+ p->csum = bch_checksum(c, PSET_CSUM_TYPE(p),
+ prio_nonce(p),
+ (void *) p + sizeof(p->csum),
+ bucket_bytes(ca) - sizeof(p->csum));
+
ret = prio_io(ca, r, REQ_OP_WRITE);
if (cache_fatal_io_err_on(ret, ca,
"prio write to bucket %zu", r) ||
@@ -306,9 +325,9 @@ static int bch_prio_write(struct cache *ca)
}
spin_lock(&j->lock);
- j->prio_buckets[ca->sb.nr_this_dev] = cpu_to_le64(ca->prio_buckets[0]);
+ j->prio_buckets[ca->dev_idx] = cpu_to_le64(ca->prio_buckets[0]);
j->nr_prio_buckets = max_t(unsigned,
- ca->sb.nr_this_dev + 1,
+ ca->dev_idx + 1,
j->nr_prio_buckets);
spin_unlock(&j->lock);
@@ -320,7 +339,7 @@ static int bch_prio_write(struct cache *ca)
return ret;
need_new_journal_entry = j->buf[res.idx].nr_prio_buckets <
- ca->sb.nr_this_dev + 1;
+ ca->dev_idx + 1;
bch_journal_res_put(j, &res);
ret = bch_journal_flush_seq(j, res.seq);
@@ -355,13 +374,14 @@ int bch_prio_read(struct cache *ca)
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket_mark new;
+ struct bch_csum csum;
unsigned bucket_nr = 0;
u64 bucket, expect, got;
size_t b;
int ret = 0;
spin_lock(&c->journal.lock);
- bucket = le64_to_cpu(c->journal.prio_buckets[ca->sb.nr_this_dev]);
+ bucket = le64_to_cpu(c->journal.prio_buckets[ca->dev_idx]);
spin_unlock(&c->journal.lock);
/*
@@ -387,18 +407,28 @@ int bch_prio_read(struct cache *ca)
return -EIO;
got = le64_to_cpu(p->magic);
- expect = pset_magic(&c->disk_sb);
+ expect = pset_magic(c);
unfixable_fsck_err_on(got != expect, c,
"bad magic (got %llu expect %llu) while reading prios from bucket %llu",
got, expect, bucket);
- got = le64_to_cpu(p->csum);
- expect = bch_checksum(PSET_CSUM_TYPE(p),
- &p->magic,
- bucket_bytes(ca) - 8);
- unfixable_fsck_err_on(got != expect, c,
- "bad checksum (got %llu expect %llu) while reading prios from bucket %llu",
- got, expect, bucket);
+ unfixable_fsck_err_on(PSET_CSUM_TYPE(p) >= BCH_CSUM_NR, c,
+ "prio bucket with unknown csum type %llu bucket %lluu",
+ PSET_CSUM_TYPE(p), bucket);
+
+ csum = bch_checksum(c, PSET_CSUM_TYPE(p),
+ prio_nonce(p),
+ (void *) p + sizeof(p->csum),
+ bucket_bytes(ca) - sizeof(p->csum));
+ unfixable_fsck_err_on(bch_crc_cmp(csum, p->csum), c,
+ "bad checksum reading prios from bucket %llu",
+ bucket);
+
+ bch_encrypt(c, PSET_CSUM_TYPE(p),
+ prio_nonce(p),
+ p->encrypted_start,
+ bucket_bytes(ca) -
+ offsetof(struct prio_set, encrypted_start));
bucket = le64_to_cpu(p->next_bucket);
d = p->data;
@@ -1029,7 +1059,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
spin_lock(&devs->lock);
for (i = 0; i < devs->nr_devices; i++)
- available += !test_bit(devs->d[i].dev->sb.nr_this_dev,
+ available += !test_bit(devs->d[i].dev->dev_idx,
caches_used);
recalc_alloc_group_weights(c, devs);
@@ -1054,7 +1084,7 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
ca = devs->d[i].dev;
- if (test_bit(ca->sb.nr_this_dev, caches_used))
+ if (test_bit(ca->dev_idx, caches_used))
continue;
if (fail_idx == -1 &&
@@ -1082,11 +1112,11 @@ static enum bucket_alloc_ret bch_bucket_alloc_group(struct cache_set *c,
ob->ptrs[0] = (struct bch_extent_ptr) {
.gen = ca->buckets[bucket].mark.gen,
.offset = bucket_to_sector(ca, bucket),
- .dev = ca->sb.nr_this_dev,
+ .dev = ca->dev_idx,
};
ob->ptr_offset[0] = 0;
- __set_bit(ca->sb.nr_this_dev, caches_used);
+ __set_bit(ca->dev_idx, caches_used);
available--;
devs->cur_device = i;
}
@@ -1334,7 +1364,7 @@ static int open_bucket_add_buckets(struct cache_set *c,
enum alloc_reserve reserve,
struct closure *cl)
{
- long caches_used[BITS_TO_LONGS(MAX_CACHES_PER_SET)];
+ long caches_used[BITS_TO_LONGS(BCH_SB_MEMBERS_MAX)];
int i, dst;
/*
@@ -1475,6 +1505,7 @@ void bch_alloc_sectors_append_ptrs(struct cache_set *c, struct bkey_i_extent *e,
EBUG_ON(bch_extent_has_device(extent_i_to_s_c(e), ob->ptrs[i].dev));
tmp = ob->ptrs[i];
+ tmp.cached = bkey_extent_is_cached(&e->k);
tmp.offset += ob->ptr_offset[i];
extent_ptr_append(e, tmp);
@@ -1657,7 +1688,7 @@ static void bch_stop_write_point(struct cache *ca,
return;
for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
- if (ptr->dev == ca->sb.nr_this_dev)
+ if (ptr->dev == ca->dev_idx)
goto found;
mutex_unlock(&ob->lock);
@@ -1682,7 +1713,7 @@ static bool bch_dev_has_open_write_point(struct cache *ca)
if (atomic_read(&ob->pin)) {
mutex_lock(&ob->lock);
for (ptr = ob->ptrs; ptr < ob->ptrs + ob->nr_ptrs; ptr++)
- if (ptr->dev == ca->sb.nr_this_dev) {
+ if (ptr->dev == ca->dev_idx) {
mutex_unlock(&ob->lock);
return true;
}
diff --git a/libbcache/alloc_types.h b/libbcache/alloc_types.h
index 337b6e46..fbe8b75c 100644
--- a/libbcache/alloc_types.h
+++ b/libbcache/alloc_types.h
@@ -56,7 +56,7 @@ struct cache_group {
struct {
u64 weight;
struct cache *dev;
- } d[MAX_CACHES_PER_SET];
+ } d[BCH_SB_MEMBERS_MAX];
};
/* Enough for 16 cache devices, 2 tiers and some left over for pipelining */
diff --git a/libbcache/bcache.h b/libbcache/bcache.h
index 309d3728..8a0262fb 100644
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -314,6 +314,8 @@ do { \
struct btree;
struct cache;
+struct crypto_blkcipher;
+struct crypto_ahash;
enum gc_phase {
GC_PHASE_PENDING_DELETE = BTREE_ID_NR + 1,
@@ -332,7 +334,6 @@ struct cache_member_cpu {
u16 bucket_size; /* sectors */
u8 state;
u8 tier;
- u8 replication_set;
u8 has_metadata;
u8 has_data;
u8 replacement;
@@ -342,7 +343,7 @@ struct cache_member_cpu {
struct cache_member_rcu {
struct rcu_head rcu;
- unsigned nr_in_set;
+ unsigned nr_devices;
struct cache_member_cpu m[];
};
@@ -363,14 +364,13 @@ struct cache {
struct cache_group self;
+ u8 dev_idx;
/*
* Cached version of this device's member info from superblock
- * Committed by write_super()
+ * Committed by bch_write_super() -> bch_cache_set_mi_update()
*/
- struct {
- u8 nr_this_dev;
- } sb;
struct cache_member_cpu mi;
+ uuid_le uuid;
struct bcache_superblock disk_sb;
@@ -518,36 +518,45 @@ struct cache_set {
struct percpu_ref writes;
struct work_struct read_only_work;
- struct cache __rcu *cache[MAX_CACHES_PER_SET];
-
- struct mutex mi_lock;
- struct cache_member_rcu __rcu *members;
- struct cache_member *disk_mi; /* protected by register_lock */
+ struct cache __rcu *cache[BCH_SB_MEMBERS_MAX];
struct cache_set_opts opts;
/*
* Cached copy in native endianness:
- * Set by cache_sb_to_cache_set:
+ * Set by bch_cache_set_mi_update():
*/
+ struct cache_member_rcu __rcu *members;
+
+ /* Updated by bch_sb_update():*/
struct {
+ uuid_le uuid;
+ uuid_le user_uuid;
+
u16 block_size;
u16 btree_node_size;
- u8 nr_in_set;
+ u8 nr_devices;
u8 clean;
u8 meta_replicas_have;
u8 data_replicas_have;
u8 str_hash_type;
+ u8 encryption_type;
+
+ u64 time_base_lo;
+ u32 time_base_hi;
+ u32 time_precision;
} sb;
- struct cache_sb disk_sb;
+ struct bch_sb *disk_sb;
+ unsigned disk_sb_order;
+
unsigned short block_bits; /* ilog2(block_size) */
struct closure sb_write;
- struct semaphore sb_write_mutex;
+ struct mutex sb_lock;
struct backing_dev_info bdi;
@@ -631,7 +640,7 @@ struct cache_set {
* allocate from:
*/
struct cache_group cache_all;
- struct cache_group cache_tiers[CACHE_TIERS];
+ struct cache_group cache_tiers[BCH_TIER_MAX];
u64 capacity; /* sectors */
@@ -724,6 +733,11 @@ struct cache_set {
struct bio_decompress_worker __percpu
*bio_decompress_worker;
+ struct crypto_blkcipher *chacha20;
+ struct crypto_shash *poly1305;
+
+ atomic64_t key_version;
+
/* For punting bio submissions to workqueue, io.c */
struct bio_list bio_submit_list;
struct work_struct bio_submit_work;
diff --git a/libbcache/bkey.c b/libbcache/bkey.c
index 64d2c845..374237e2 100644
--- a/libbcache/bkey.c
+++ b/libbcache/bkey.c
@@ -81,9 +81,9 @@ int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
- p("u64s %u type %u %llu:%llu snap %u len %u ver %u",
+ p("u64s %u type %u %llu:%llu snap %u len %u ver %llu",
k->u64s, k->type, k->p.inode, k->p.offset,
- k->p.snapshot, k->size, k->version);
+ k->p.snapshot, k->size, k->version.lo);
BUG_ON(bkey_packed(k));
@@ -258,13 +258,21 @@ bool bch_bkey_transform(const struct bkey_format *out_f,
return true;
}
+#define bkey_fields() \
+ x(BKEY_FIELD_INODE, p.inode) \
+ x(BKEY_FIELD_OFFSET, p.offset) \
+ x(BKEY_FIELD_SNAPSHOT, p.snapshot) \
+ x(BKEY_FIELD_SIZE, size) \
+ x(BKEY_FIELD_VERSION_HI, version.hi) \
+ x(BKEY_FIELD_VERSION_LO, version.lo)
+
struct bkey __bkey_unpack_key(const struct bkey_format *format,
const struct bkey_packed *in)
{
struct unpack_state state = unpack_state_init(format, in);
struct bkey out;
- EBUG_ON(format->nr_fields != 5);
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
EBUG_ON(in->u64s < format->key_u64s);
EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
EBUG_ON(in->u64s - format->key_u64s + BKEY_U64s > U8_MAX);
@@ -274,11 +282,10 @@ struct bkey __bkey_unpack_key(const struct bkey_format *format,
out.needs_whiteout = in->needs_whiteout;
out.type = in->type;
out.pad[0] = 0;
- out.p.inode = get_inc_field(&state, BKEY_FIELD_INODE);
- out.p.offset = get_inc_field(&state, BKEY_FIELD_OFFSET);
- out.p.snapshot = get_inc_field(&state, BKEY_FIELD_SNAPSHOT);
- out.size = get_inc_field(&state, BKEY_FIELD_SIZE);
- out.version = get_inc_field(&state, BKEY_FIELD_VERSION);
+
+#define x(id, field) out.field = get_inc_field(&state, id);
+ bkey_fields()
+#undef x
return out;
}
@@ -290,7 +297,7 @@ struct bpos __bkey_unpack_pos(const struct bkey_format *format,
struct unpack_state state = unpack_state_init(format, in);
struct bpos out;
- EBUG_ON(format->nr_fields != 5);
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
EBUG_ON(in->u64s < format->key_u64s);
EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
@@ -311,17 +318,14 @@ bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
struct pack_state state = pack_state_init(format, out);
EBUG_ON((void *) in == (void *) out);
- EBUG_ON(format->nr_fields != 5);
+ EBUG_ON(format->nr_fields != BKEY_NR_FIELDS);
EBUG_ON(in->format != KEY_FORMAT_CURRENT);
out->_data[0] = 0;
- if (!set_inc_field(&state, BKEY_FIELD_INODE, in->p.inode) ||
- !set_inc_field(&state, BKEY_FIELD_OFFSET, in->p.offset) ||
- !set_inc_field(&state, BKEY_FIELD_SNAPSHOT, in->p.snapshot) ||
- !set_inc_field(&state, BKEY_FIELD_SIZE, in->size) ||
- !set_inc_field(&state, BKEY_FIELD_VERSION, in->version))
- return false;
+#define x(id, field) if (!set_inc_field(&state, id, in->field)) return false;
+ bkey_fields()
+#undef x
/*
* Extents - we have to guarantee that if an extent is packed, a trimmed
@@ -340,47 +344,6 @@ bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
return true;
}
-/*
- * Alternate implementations using bch_bkey_transform_key() - unfortunately, too
- * slow
- */
-#if 0
-struct bkey __bkey_unpack_key(const struct bkey_format *format,
- const struct bkey_packed *in)
-{
- struct bkey out;
- bool s;
-
- EBUG_ON(format->nr_fields != 5);
- EBUG_ON(in->u64s < format->key_u64s);
- EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
-
- s = bch_bkey_transform_key(&bch_bkey_format_current, (void *) &out,
- format, in);
- EBUG_ON(!s);
-
- out.format = KEY_FORMAT_CURRENT;
-
- return out;
-}
-
-bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
- const struct bkey_format *format)
-{
- EBUG_ON(format->nr_fields != 5);
- EBUG_ON(in->format != KEY_FORMAT_CURRENT);
-
- if (!bch_bkey_transform_key(format, out,
- &bch_bkey_format_current, (void *) in))
- return false;
-
- out->format = KEY_FORMAT_LOCAL_BTREE;
-
- bch_bkey_pack_verify(out, in, format);
- return true;
-}
-#endif
-
/**
* bkey_unpack -- unpack the key and the value
*/
@@ -588,12 +551,10 @@ static void __bkey_format_add(struct bkey_format_state *s,
*/
void bch_bkey_format_add_key(struct bkey_format_state *s, const struct bkey *k)
{
- __bkey_format_add(s, BKEY_FIELD_INODE, k->p.inode);
- __bkey_format_add(s, BKEY_FIELD_OFFSET, k->p.offset);
+#define x(id, field) __bkey_format_add(s, id, k->field);
+ bkey_fields()
+#undef x
__bkey_format_add(s, BKEY_FIELD_OFFSET, bkey_start_offset(k));
- __bkey_format_add(s, BKEY_FIELD_SNAPSHOT, k->p.snapshot);
- __bkey_format_add(s, BKEY_FIELD_SIZE, k->size);
- __bkey_format_add(s, BKEY_FIELD_VERSION, k->version);
}
void bch_bkey_format_add_pos(struct bkey_format_state *s, struct bpos p)
@@ -636,6 +597,12 @@ struct bkey_format bch_bkey_format_done(struct bkey_format_state *s)
bits += ret.bits_per_field[i];
}
+ /* allow for extent merging: */
+ if (ret.bits_per_field[BKEY_FIELD_SIZE]) {
+ ret.bits_per_field[BKEY_FIELD_SIZE] += 4;
+ bits += 4;
+ }
+
ret.key_u64s = DIV_ROUND_UP(bits, 64);
/* if we have enough spare bits, round fields up to nearest byte */
@@ -1014,25 +981,13 @@ int bch_compile_bkey_format(const struct bkey_format *format, void *_out)
/* mov [rdi], eax */
I2(0x89, 0x07);
- out = compile_bkey_field(format, out, BKEY_FIELD_INODE,
- offsetof(struct bkey, p.inode), 8,
- &eax_zeroed);
-
- out = compile_bkey_field(format, out, BKEY_FIELD_OFFSET,
- offsetof(struct bkey, p.offset), 8,
- &eax_zeroed);
-
- out = compile_bkey_field(format, out, BKEY_FIELD_SNAPSHOT,
- offsetof(struct bkey, p.snapshot), 4,
- &eax_zeroed);
-
- out = compile_bkey_field(format, out, BKEY_FIELD_SIZE,
- offsetof(struct bkey, size), 4,
- &eax_zeroed);
-
- out = compile_bkey_field(format, out, BKEY_FIELD_VERSION,
- offsetof(struct bkey, version), 4,
+#define x(id, field) \
+ out = compile_bkey_field(format, out, id, \
+ offsetof(struct bkey, field), \
+ sizeof(((struct bkey *) NULL)->field), \
&eax_zeroed);
+ bkey_fields()
+#undef x
/* retq */
I1(0xc3);
@@ -1078,43 +1033,6 @@ static inline int __bkey_cmp_bits(const u64 *l, const u64 *r,
}
#endif
-/*
- * Would like to use this if we can make __bkey_cmp_bits() fast enough, it'll be
- * a decent reduction in code size
- */
-#if 0
-static int bkey_cmp_verify(const struct bkey *l, const struct bkey *r)
-{
- if (l->p.inode != r->p.inode)
- return l->p.inode < r->p.inode ? -1 : 1;
-
- if (l->p.offset != r->p.offset)
- return l->p.offset < r->p.offset ? -1 : 1;
-
- if (l->p.snapshot != r->p.snapshot)
- return l->p.snapshot < r->p.snapshot ? -1 : 1;
-
- return 0;
-}
-
-int bkey_cmp(const struct bkey *l, const struct bkey *r)
-{
- int ret;
-
- EBUG_ON(bkey_packed(l) || bkey_packed(r));
-
- ret = __bkey_cmp_bits((sizeof(l->inode) +
- sizeof(l->offset) +
- sizeof(l->snapshot)) * BITS_PER_BYTE,
- __high_word(BKEY_U64s, l),
- __high_word(BKEY_U64s, r));
-
- BUG_ON(ret != bkey_cmp_verify(l, r));
-
- return ret;
-}
-#endif
-
__pure
int __bkey_cmp_packed_format_checked(const struct bkey_packed *l,
const struct bkey_packed *r,
@@ -1214,7 +1132,7 @@ void bkey_pack_test(void)
struct bkey_format test_format = {
.key_u64s = 2,
- .nr_fields = 5,
+ .nr_fields = BKEY_NR_FIELDS,
.bits_per_field = {
13,
64,
@@ -1230,21 +1148,9 @@ void bkey_pack_test(void)
u64 a, v = get_inc_field(&in_s, i);
switch (i) {
- case 0:
- a = t.p.inode;
- break;
- case 1:
- a = t.p.offset;
- break;
- case 2:
- a = t.p.snapshot;
- break;
- case 3:
- a = t.size;
- break;
- case 4:
- a = t.version;
- break;
+#define x(id, field) case id: a = t.field; break;
+ bkey_fields()
+#undef x
default:
BUG();
}
diff --git a/libbcache/bkey.h b/libbcache/bkey.h
index 3e29cdde..0893134f 100644
--- a/libbcache/bkey.h
+++ b/libbcache/bkey.h
@@ -5,6 +5,7 @@
#include <linux/bcache.h>
#include "util.h"
+#include "vstructs.h"
void bch_to_binary(char *, const u64 *, unsigned);
int bch_bkey_to_text(char *, size_t, const struct bkey *);
@@ -28,15 +29,7 @@ struct bkey_s {
};
};
-#define bkey_next(_k) \
-({ \
- BUILD_BUG_ON(!type_is(_k, struct bkey *) && \
- !type_is(_k, struct bkey_i *) && \
- !type_is(_k, struct bkey_packed *)); \
- \
- ((typeof(_k)) __bkey_idx(((struct bkey *) (_k)), \
- ((struct bkey *) (_k))->u64s)); \
-})
+#define bkey_next(_k) vstruct_next(_k)
static inline unsigned bkey_val_u64s(const struct bkey *k)
{
@@ -218,6 +211,22 @@ static inline struct bpos bpos_min(struct bpos l, struct bpos r)
void bch_bpos_swab(struct bpos *);
void bch_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
+static __always_inline int bversion_cmp(struct bversion l, struct bversion r)
+{
+ if (l.hi != r.hi)
+ return l.hi < r.hi ? -1 : 1;
+ if (l.lo != r.lo)
+ return l.lo < r.lo ? -1 : 1;
+ return 0;
+}
+
+#define ZERO_VERSION ((struct bversion) { .hi = 0, .lo = 0 })
+
+static __always_inline int bversion_zero(struct bversion v)
+{
+ return !bversion_cmp(v, ZERO_VERSION);
+}
+
#ifdef CONFIG_BCACHE_DEBUG
/* statement expressions confusing unlikely()? */
#define bkey_packed(_k) \
@@ -555,6 +564,7 @@ static inline void __bch_extent_assert(u8 type, u8 nr)
}
__BKEY_VAL_ACCESSORS(extent, BCH_EXTENT, __bch_extent_assert);
+BKEY_VAL_ACCESSORS(reservation, BCH_RESERVATION);
BKEY_VAL_ACCESSORS(inode, BCH_INODE_FS);
BKEY_VAL_ACCESSORS(inode_blockdev, BCH_INODE_BLOCKDEV);
diff --git a/libbcache/blockdev.c b/libbcache/blockdev.c
index cd231f5e..d3a373c2 100644
--- a/libbcache/blockdev.c
+++ b/libbcache/blockdev.c
@@ -2,11 +2,12 @@
#include "bcache.h"
#include "blockdev.h"
#include "btree_iter.h"
+#include "btree_update.h"
#include "checksum.h"
#include "error.h"
#include "inode.h"
#include "request.h"
-#include "super.h"
+#include "super-io.h"
#include "writeback.h"
#include <linux/kthread.h>
@@ -42,15 +43,22 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex);
closure_init(cl, parent);
+ sb->csum = csum_vstruct(NULL, BCH_CSUM_CRC64,
+ (struct nonce) { 0 }, sb).lo;
+
bio_reset(bio);
- bio->bi_end_io = write_bdev_super_endio;
- bio->bi_private = dc;
+ bio->bi_bdev = dc->disk_sb.bdev;
+ bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
+ bio->bi_iter.bi_size =
+ roundup(vstruct_bytes(sb),
+ bdev_logical_block_size(dc->disk_sb.bdev));
+ bio->bi_end_io = write_bdev_super_endio;
+ bio->bi_private = dc;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FUA|REQ_META);
+ bch_bio_map(bio, sb);
closure_get(cl);
- sb->csum = cpu_to_le64(__csum_set(sb, 0, BCH_CSUM_CRC64));
- __write_super(dc->disk.c, (void *) &dc->disk_sb);
-
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
}
@@ -263,7 +271,7 @@ static void calc_cached_dev_sectors(struct cache_set *c)
void bch_cached_dev_run(struct cached_dev *dc)
{
struct bcache_device *d = &dc->disk;
- char buf[SB_LABEL_SIZE + 1];
+ char buf[BCH_SB_LABEL_SIZE + 1];
char *env[] = {
"DRIVER=bcache",
kasprintf(GFP_KERNEL, "CACHED_UUID=%pU",
@@ -272,8 +280,8 @@ void bch_cached_dev_run(struct cached_dev *dc)
NULL,
};
- memcpy(buf, dc->disk_sb.sb->label, SB_LABEL_SIZE);
- buf[SB_LABEL_SIZE] = '\0';
+ memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
+ buf[BCH_SB_LABEL_SIZE] = '\0';
env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
if (atomic_xchg(&dc->running, 1)) {
@@ -370,8 +378,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
bdevname(dc->disk_sb.bdev, buf);
if (memcmp(&dc->disk_sb.sb->set_uuid,
- &c->disk_sb.set_uuid,
- sizeof(c->disk_sb.set_uuid)))
+ &c->sb.uuid,
+ sizeof(c->sb.uuid)))
return -ENOENT;
if (dc->disk.c) {
@@ -424,7 +432,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
SET_CACHED_DEV(&dc->disk.inode.v, true);
dc->disk.inode.v.i_uuid = dc->disk_sb.sb->disk_uuid;
memcpy(dc->disk.inode.v.i_label,
- dc->disk_sb.sb->label, SB_LABEL_SIZE);
+ dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
dc->disk.inode.v.i_ctime = rtime;
dc->disk.inode.v.i_mtime = rtime;
@@ -438,14 +446,15 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
pr_info("attached inode %llu", bcache_dev_inum(&dc->disk));
- dc->disk_sb.sb->set_uuid = c->disk_sb.set_uuid;
+ dc->disk_sb.sb->set_uuid = c->sb.uuid;
SET_BDEV_STATE(dc->disk_sb.sb, BDEV_STATE_CLEAN);
bch_write_bdev_super(dc, &cl);
closure_sync(&cl);
} else {
dc->disk.inode.v.i_mtime = rtime;
- bch_inode_update(c, &dc->disk.inode.k_i, NULL);
+ bch_btree_update(c, BTREE_ID_INODES,
+ &dc->disk.inode.k_i, NULL);
}
/* Count dirty sectors before attaching */
@@ -479,7 +488,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
pr_info("Caching %s as %s on set %pU",
bdevname(dc->disk_sb.bdev, buf), dc->disk.disk->disk_name,
- dc->disk.c->disk_sb.set_uuid.b);
+ dc->disk.c->sb.uuid.b);
return 0;
}
@@ -517,7 +526,7 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
- free_super((void *) &dc->disk_sb);
+ bch_free_super((void *) &dc->disk_sb);
kobject_put(&dc->disk.kobj);
}
diff --git a/libbcache/bset.c b/libbcache/bset.c
index 34880952..a88d8017 100644
--- a/libbcache/bset.c
+++ b/libbcache/bset.c
@@ -59,7 +59,7 @@ void bch_dump_bset(struct btree *b, struct bset *i, unsigned set)
return;
for (_k = i->start, k = bkey_unpack_key(b, _k);
- _k < bset_bkey_last(i);
+ _k < vstruct_last(i);
_k = _n, k = n) {
_n = bkey_next(_k);
@@ -67,7 +67,7 @@ void bch_dump_bset(struct btree *b, struct bset *i, unsigned set)
printk(KERN_ERR "block %u key %zi/%u: %s\n", set,
_k->_data - i->_data, i->u64s, buf);
- if (_n == bset_bkey_last(i))
+ if (_n == vstruct_last(i))
continue;
n = bkey_unpack_key(b, _n);
diff --git a/libbcache/bset.h b/libbcache/bset.h
index f03e6b86..70868c51 100644
--- a/libbcache/bset.h
+++ b/libbcache/bset.h
@@ -9,6 +9,7 @@
#include "bkey_methods.h"
#include "btree_types.h"
#include "util.h" /* for time_stats */
+#include "vstructs.h"
/*
* BKEYS:
@@ -302,15 +303,6 @@ static inline void btree_node_set_format(struct btree *b,
bch_bset_set_no_aux_tree(b, b->set);
}
-#define __set_bytes(_i, _u64s) (sizeof(*(_i)) + (_u64s) * sizeof(u64))
-#define set_bytes(_i) __set_bytes(_i, (_i)->u64s)
-
-#define __set_blocks(_i, _u64s, _block_bytes) \
- DIV_ROUND_UP((size_t) __set_bytes((_i), (_u64s)), (_block_bytes))
-
-#define set_blocks(_i, _block_bytes) \
- __set_blocks((_i), (_i)->u64s, (_block_bytes))
-
static inline struct bset *bset_next_set(struct btree *b,
unsigned block_bytes)
{
@@ -318,7 +310,7 @@ static inline struct bset *bset_next_set(struct btree *b,
EBUG_ON(!is_power_of_2(block_bytes));
- return ((void *) i) + round_up(set_bytes(i), block_bytes);
+ return ((void *) i) + round_up(vstruct_bytes(i), block_bytes);
}
void bch_btree_keys_free(struct btree *);
@@ -387,11 +379,6 @@ static inline bool btree_iter_pos_cmp_p_or_unp(const struct btree *b,
(cmp == 0 && !strictly_greater && !bkey_deleted(k));
}
-static inline struct bkey_packed *bset_bkey_idx(struct bset *i, unsigned idx)
-{
- return bkey_idx(i, idx);
-}
-
struct bset_tree *bch_bkey_to_bset(struct btree *, struct bkey_packed *);
struct bkey_packed *bkey_prev_all(struct btree *, struct bset_tree *,
struct bkey_packed *);
diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c
index ca6064af..4d5efdbd 100644
--- a/libbcache/btree_cache.c
+++ b/libbcache/btree_cache.c
@@ -695,7 +695,7 @@ retry:
EBUG_ON(!b->written);
EBUG_ON(b->btree_id != iter->btree_id ||
- BSET_BTREE_LEVEL(&b->data->keys) != level ||
+ BTREE_NODE_LEVEL(b->data) != level ||
bkey_cmp(b->data->max_key, k->k.p));
return b;
diff --git a/libbcache/btree_gc.c b/libbcache/btree_gc.c
index 84171875..5c77b267 100644
--- a/libbcache/btree_gc.c
+++ b/libbcache/btree_gc.c
@@ -18,6 +18,7 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "super-io.h"
#include "writeback.h"
#include <linux/slab.h>
@@ -118,8 +119,8 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k)
/*
* For runtime mark and sweep:
*/
-u8 __bch_btree_mark_key(struct cache_set *c, enum bkey_type type,
- struct bkey_s_c k)
+static u8 bch_btree_mark_key(struct cache_set *c, enum bkey_type type,
+ struct bkey_s_c k)
{
switch (type) {
case BKEY_TYPE_BTREE:
@@ -133,10 +134,14 @@ u8 __bch_btree_mark_key(struct cache_set *c, enum bkey_type type,
}
}
-static u8 btree_mark_key(struct cache_set *c, struct btree *b,
- struct bkey_s_c k)
+u8 bch_btree_mark_key_initial(struct cache_set *c, enum bkey_type type,
+ struct bkey_s_c k)
{
- return __bch_btree_mark_key(c, btree_node_type(b), k);
+ atomic64_set(&c->key_version,
+ max_t(u64, k.k->version.lo,
+ atomic64_read(&c->key_version)));
+
+ return bch_btree_mark_key(c, type, k);
}
static bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
@@ -151,7 +156,8 @@ static bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
btree_node_is_extents(b),
&unpacked) {
bkey_debugcheck(c, b, k);
- stale = max(stale, btree_mark_key(c, b, k));
+ stale = max(stale, bch_btree_mark_key(c,
+ btree_node_type(b), k));
}
if (btree_gc_rewrite_disabled(c))
@@ -218,7 +224,7 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id)
mutex_lock(&c->btree_root_lock);
b = c->btree_roots[btree_id].b;
- __bch_btree_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key));
+ bch_btree_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key));
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
@@ -265,22 +271,21 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
static void bch_mark_metadata(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned i, j;
+ u64 b;
for_each_cache(ca, c, i) {
- unsigned j;
- u64 *i;
-
- for (j = 0; j < bch_nr_journal_buckets(ca->disk_sb.sb); j++)
- bch_mark_metadata_bucket(ca,
- &ca->buckets[journal_bucket(ca->disk_sb.sb, j)],
- true);
+ for (j = 0; j < ca->journal.nr; j++) {
+ b = ca->journal.buckets[j];
+ bch_mark_metadata_bucket(ca, ca->buckets + b, true);
+ }
spin_lock(&ca->prio_buckets_lock);
- for (i = ca->prio_buckets;
- i < ca->prio_buckets + prio_buckets(ca) * 2; i++)
- bch_mark_metadata_bucket(ca, &ca->buckets[*i], true);
+ for (j = 0; j < prio_buckets(ca) * 2; j++) {
+ b = ca->prio_buckets[j];
+ bch_mark_metadata_bucket(ca, ca->buckets + b, true);
+ }
spin_unlock(&ca->prio_buckets_lock);
}
@@ -476,9 +481,8 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
/* Check if all keys in @old_nodes could fit in one fewer node */
if (nr_old_nodes <= 1 ||
- __set_blocks(old_nodes[0]->data,
- DIV_ROUND_UP(u64s, nr_old_nodes - 1),
- block_bytes(c)) > blocks)
+ __vstruct_blocks(struct btree_node, c->block_bits,
+ DIV_ROUND_UP(u64s, nr_old_nodes - 1)) > blocks)
return;
res = bch_btree_reserve_get(c, parent, nr_old_nodes,
@@ -542,9 +546,9 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
u64s = 0;
for (k = s2->start;
- k < bset_bkey_last(s2) &&
- __set_blocks(n1->data, le16_to_cpu(s1->u64s) + u64s + k->u64s,
- block_bytes(c)) <= blocks;
+ k < vstruct_last(s2) &&
+ vstruct_blocks_plus(n1->data, c->block_bits,
+ u64s + k->u64s) <= blocks;
k = bkey_next(k)) {
last = k;
u64s += k->u64s;
@@ -554,7 +558,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
/* n2 fits entirely in n1 */
n1->key.k.p = n1->data->max_key = n2->data->max_key;
- memcpy_u64s(bset_bkey_last(s1),
+ memcpy_u64s(vstruct_last(s1),
s2->start,
le16_to_cpu(s2->u64s));
le16_add_cpu(&s1->u64s, le16_to_cpu(s2->u64s));
@@ -578,12 +582,12 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
btree_type_successor(iter->btree_id,
n1->data->max_key);
- memcpy_u64s(bset_bkey_last(s1),
+ memcpy_u64s(vstruct_last(s1),
s2->start, u64s);
le16_add_cpu(&s1->u64s, u64s);
memmove(s2->start,
- bset_bkey_idx(s2, u64s),
+ vstruct_idx(s2, u64s),
(le16_to_cpu(s2->u64s) - u64s) * sizeof(u64));
s2->u64s = cpu_to_le16(le16_to_cpu(s2->u64s) - u64s);
@@ -866,7 +870,7 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id)
for_each_btree_node_key_unpack(b, k, &node_iter,
btree_node_is_extents(b),
&unpacked)
- btree_mark_key(c, b, k);
+ bch_btree_mark_key_initial(c, btree_node_type(b), k);
}
bch_btree_iter_cond_resched(&iter);
@@ -874,8 +878,8 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id)
bch_btree_iter_unlock(&iter);
- __bch_btree_mark_key(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&c->btree_roots[id].b->key));
+ bch_btree_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&c->btree_roots[id].b->key));
}
int bch_initial_gc(struct cache_set *c, struct list_head *journal)
@@ -889,6 +893,13 @@ int bch_initial_gc(struct cache_set *c, struct list_head *journal)
bch_journal_mark(c, journal);
}
+ /*
+ * Skip past versions that might have possibly been used (as nonces),
+ * but hadn't had their pointers written:
+ */
+ if (c->sb.encryption_type)
+ atomic64_add(1 << 16, &c->key_version);
+
bch_mark_metadata(c);
gc_pos_set(c, gc_phase(GC_PHASE_DONE));
diff --git a/libbcache/btree_gc.h b/libbcache/btree_gc.h
index 91d31c05..0607187f 100644
--- a/libbcache/btree_gc.h
+++ b/libbcache/btree_gc.h
@@ -11,7 +11,7 @@ void bch_gc_thread_stop(struct cache_set *);
int bch_gc_thread_start(struct cache_set *);
int bch_initial_gc(struct cache_set *, struct list_head *);
u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c);
-u8 __bch_btree_mark_key(struct cache_set *, enum bkey_type,
+u8 bch_btree_mark_key_initial(struct cache_set *, enum bkey_type,
struct bkey_s_c);
/*
diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c
index 4c295af1..e772c6ad 100644
--- a/libbcache/btree_io.c
+++ b/libbcache/btree_io.c
@@ -13,6 +13,7 @@
#include "extents.h"
#include "io.h"
#include "journal.h"
+#include "super-io.h"
#include <trace/events/bcache.h>
@@ -39,7 +40,7 @@ static void clear_needs_whiteout(struct bset *i)
{
struct bkey_packed *k;
- for (k = i->start; k != bset_bkey_last(i); k = bkey_next(k))
+ for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = false;
}
@@ -47,7 +48,7 @@ static void set_needs_whiteout(struct bset *i)
{
struct bkey_packed *k;
- for (k = i->start; k != bset_bkey_last(i); k = bkey_next(k))
+ for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
k->needs_whiteout = true;
}
@@ -341,7 +342,7 @@ bool __bch_compact_whiteouts(struct cache_set *c, struct btree *b,
compacting = true;
u_start = u_pos;
start = i->start;
- end = bset_bkey_last(i);
+ end = vstruct_last(i);
if (src != dst) {
memmove(dst, src, sizeof(*src));
@@ -574,7 +575,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
order = sorting_entire_node
? btree_page_order(c)
- : get_order(__set_bytes(b->data, u64s));
+ : get_order(__vstruct_bytes(struct btree_node, u64s));
out = btree_bounce_alloc(c, order, &used_mempool);
@@ -589,8 +590,7 @@ static void btree_node_sort(struct cache_set *c, struct btree *b,
out->keys.u64s = cpu_to_le16(u64s);
- BUG_ON((void *) bset_bkey_last(&out->keys) >
- (void *) out + (PAGE_SIZE << order));
+ BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
if (sorting_entire_node)
bch_time_stats_update(&c->btree_sort_time, start_time);
@@ -654,7 +654,7 @@ static struct btree_nr_keys sort_repack(struct bset *dst,
bool filter_whiteouts)
{
struct bkey_format *in_f = &src->format;
- struct bkey_packed *in, *out = bset_bkey_last(dst);
+ struct bkey_packed *in, *out = vstruct_last(dst);
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
@@ -723,7 +723,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c,
btree_keys_account_key_add(&nr, 0, prev);
prev = bkey_next(prev);
} else {
- prev = bset_bkey_last(dst);
+ prev = vstruct_last(dst);
}
bkey_copy(prev, &tmp.k);
@@ -734,7 +734,7 @@ static struct btree_nr_keys sort_repack_merge(struct cache_set *c,
btree_keys_account_key_add(&nr, 0, prev);
out = bkey_next(prev);
} else {
- out = bset_bkey_last(dst);
+ out = vstruct_last(dst);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@@ -854,22 +854,23 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b,
bch_btree_iter_reinit_node(iter, b);
}
-/*
- * We seed the checksum with the entire first pointer (dev, gen and offset),
- * since for btree nodes we have to store the checksum with the data instead of
- * the pointer - this helps guard against reading a valid btree node that is not
- * the node we actually wanted:
- */
-#define btree_csum_set(_b, _i) \
-({ \
- void *_data = (void *) (_i) + 8; \
- void *_end = bset_bkey_last(&(_i)->keys); \
- \
- bch_checksum_update(BSET_CSUM_TYPE(&(_i)->keys), \
- bkey_i_to_extent_c(&(_b)->key)->v._data[0], \
- _data, \
- _end - _data) ^ 0xffffffffffffffffULL; \
-})
+static struct nonce btree_nonce(struct btree *b,
+ struct bset *i,
+ unsigned offset)
+{
+ return (struct nonce) {{
+ [0] = cpu_to_le32(offset),
+ [1] = ((__le32 *) &i->seq)[0],
+ [2] = ((__le32 *) &i->seq)[1],
+ [3] = ((__le32 *) &i->journal_seq)[0]^BCH_NONCE_BTREE,
+ }};
+}
+
+static void bset_encrypt(struct cache_set *c, struct bset *i, struct nonce nonce)
+{
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data,
+ vstruct_end(i) - (void *) i->_data);
+}
#define btree_node_error(b, c, ptr, fmt, ...) \
cache_set_inconsistent(c, \
@@ -877,7 +878,7 @@ void bch_btree_init_next(struct cache_set *c, struct btree *b,
(b)->btree_id, (b)->level, btree_node_root(c, b) \
? btree_node_root(c, b)->level : -1, \
PTR_BUCKET_NR(ca, ptr), (b)->written, \
- (i)->u64s, ##__VA_ARGS__)
+ le16_to_cpu((i)->u64s), ##__VA_ARGS__)
static const char *validate_bset(struct cache_set *c, struct btree *b,
struct cache *ca,
@@ -886,6 +887,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
unsigned *whiteout_u64s)
{
struct bkey_packed *k, *prev = NULL;
+ struct bpos prev_pos = POS_MIN;
bool seen_non_whiteout = false;
if (le16_to_cpu(i->version) != BCACHE_BSET_VERSION)
@@ -903,7 +905,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
}
for (k = i->start;
- k != bset_bkey_last(i);) {
+ k != vstruct_last(i);) {
struct bkey_s_c u;
struct bkey tmp;
const char *invalid;
@@ -911,13 +913,13 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
if (!k->u64s) {
btree_node_error(b, c, ptr,
"KEY_U64s 0: %zu bytes of metadata lost",
- (void *) bset_bkey_last(i) - (void *) k);
+ vstruct_end(i) - (void *) k);
i->u64s = cpu_to_le16((u64 *) k - i->_data);
break;
}
- if (bkey_next(k) > bset_bkey_last(i)) {
+ if (bkey_next(k) > vstruct_last(i)) {
btree_node_error(b, c, ptr,
"key extends past end of bset");
@@ -931,7 +933,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
- (u64 *) bset_bkey_last(i) - (u64 *) k);
+ (u64 *) vstruct_end(i) - (u64 *) k);
continue;
}
@@ -951,7 +953,7 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
memmove_u64s_down(k, bkey_next(k),
- (u64 *) bset_bkey_last(i) - (u64 *) k);
+ (u64 *) vstruct_end(i) - (u64 *) k);
continue;
}
@@ -963,22 +965,40 @@ static const char *validate_bset(struct cache_set *c, struct btree *b,
if (!seen_non_whiteout &&
(!bkey_whiteout(k) ||
- (prev && bkey_cmp_left_packed_byval(b, prev,
- bkey_start_pos(u.k)) > 0))) {
+ (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) {
*whiteout_u64s = k->_data - i->_data;
seen_non_whiteout = true;
+ } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
+ btree_node_error(b, c, ptr,
+ "keys out of order: %llu:%llu > %llu:%llu",
+ prev_pos.inode,
+ prev_pos.offset,
+ u.k->p.inode,
+ bkey_start_offset(u.k));
+ /* XXX: repair this */
}
+ prev_pos = u.k->p;
prev = k;
k = bkey_next(k);
}
SET_BSET_BIG_ENDIAN(i, CPU_BIG_ENDIAN);
-
- b->written += sectors;
return NULL;
}
+static bool extent_contains_ptr(struct bkey_s_c_extent e,
+ struct bch_extent_ptr match)
+{
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr(e, ptr)
+ if (!memcmp(ptr, &match, sizeof(*ptr)))
+ return true;
+
+ return false;
+}
+
void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
struct cache *ca,
const struct bch_extent_ptr *ptr)
@@ -990,6 +1010,8 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
bool used_mempool;
unsigned u64s;
const char *err;
+ struct bch_csum csum;
+ struct nonce nonce;
int ret;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
@@ -1005,40 +1027,62 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
if (!b->written) {
i = &b->data->keys;
+ err = "bad magic";
+ if (le64_to_cpu(b->data->magic) != bset_magic(c))
+ goto err;
+
+ err = "bad btree header";
+ if (!b->data->keys.seq)
+ goto err;
+
err = "unknown checksum type";
- if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto err;
/* XXX: retry checksum errors */
+ nonce = btree_nonce(b, i, b->written << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, b->data);
+
err = "bad checksum";
- if (le64_to_cpu(b->data->csum) !=
- btree_csum_set(b, b->data))
+ if (bch_crc_cmp(csum, b->data->csum))
goto err;
- sectors = __set_blocks(b->data,
- le16_to_cpu(b->data->keys.u64s),
- block_bytes(c)) << c->block_bits;
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce,
+ &b->data->flags,
+ (void *) &b->data->keys -
+ (void *) &b->data->flags);
+ nonce = nonce_add(nonce,
+ round_up((void *) &b->data->keys -
+ (void *) &b->data->flags,
+ CHACHA20_BLOCK_SIZE));
+ bset_encrypt(c, i, nonce);
- err = "bad magic";
- if (le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb))
- goto err;
-
- err = "bad btree header";
- if (!b->data->keys.seq)
- goto err;
+ sectors = vstruct_sectors(b->data, c->block_bits);
if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
+ u64 *p = (u64 *) &b->data->ptr;
+
+ *p = swab64(*p);
bch_bpos_swab(&b->data->min_key);
bch_bpos_swab(&b->data->max_key);
}
+ err = "incorrect btree id";
+ if (BTREE_NODE_ID(b->data) != b->btree_id)
+ goto err;
+
+ err = "incorrect level";
+ if (BTREE_NODE_LEVEL(b->data) != b->level)
+ goto err;
+
err = "incorrect max key";
if (bkey_cmp(b->data->max_key, b->key.k.p))
goto err;
- err = "incorrect level";
- if (BSET_BTREE_LEVEL(i) != b->level)
+ err = "incorrect backpointer";
+ if (!extent_contains_ptr(bkey_i_to_s_c_extent(&b->key),
+ b->data->ptr))
goto err;
err = bch_bkey_format_validate(&b->data->format);
@@ -1056,23 +1100,27 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
break;
err = "unknown checksum type";
- if (BSET_CSUM_TYPE(i) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, BSET_CSUM_TYPE(i)))
goto err;
+ nonce = btree_nonce(b, i, b->written << 9);
+ csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+
err = "bad checksum";
- if (le64_to_cpu(bne->csum) !=
- btree_csum_set(b, bne))
+ if (memcmp(&csum, &bne->csum, sizeof(csum)))
goto err;
- sectors = __set_blocks(bne,
- le16_to_cpu(bne->keys.u64s),
- block_bytes(c)) << c->block_bits;
+ bset_encrypt(c, i, nonce);
+
+ sectors = vstruct_sectors(bne, c->block_bits);
}
err = validate_bset(c, b, ca, ptr, i, sectors, &whiteout_u64s);
if (err)
goto err;
+ b->written += sectors;
+
err = "insufficient memory";
ret = bch_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
if (ret < 0)
@@ -1083,11 +1131,11 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
__bch_btree_node_iter_push(iter, b,
i->start,
- bkey_idx(i, whiteout_u64s));
+ vstruct_idx(i, whiteout_u64s));
__bch_btree_node_iter_push(iter, b,
- bkey_idx(i, whiteout_u64s),
- bset_bkey_last(i));
+ vstruct_idx(i, whiteout_u64s),
+ vstruct_last(i));
}
err = "corrupted btree";
@@ -1290,6 +1338,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
struct bch_extent_ptr *ptr;
struct cache *ca;
struct sort_iter sort_iter;
+ struct nonce nonce;
unsigned bytes_to_write, sectors_to_write, order, bytes, u64s;
u64 seq = 0;
bool used_mempool;
@@ -1330,7 +1379,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
BUG_ON(b->written >= c->sb.btree_node_size);
BUG_ON(bset_written(b, btree_bset_last(b)));
- BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(&c->disk_sb));
+ BUG_ON(le64_to_cpu(b->data->magic) != bset_magic(c));
BUG_ON(memcmp(&b->data->format, &b->format, sizeof(b->format)));
if (lock_type_held == SIX_LOCK_intent) {
@@ -1396,7 +1445,7 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
b->whiteout_u64s = 0;
u64s = btree_node_is_extents(b)
- ? sort_extents(bset_bkey_last(i), &sort_iter, false)
+ ? sort_extents(vstruct_last(i), &sort_iter, false)
: sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s);
@@ -1413,14 +1462,30 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
BUG_ON(i->seq != b->data->keys.seq);
i->version = cpu_to_le16(BCACHE_BSET_VERSION);
- SET_BSET_CSUM_TYPE(i, c->opts.metadata_checksum);
+ SET_BSET_CSUM_TYPE(i, bch_meta_checksum_type(c));
+
+ nonce = btree_nonce(b, i, b->written << 9);
+
+ if (bn) {
+ bch_encrypt(c, BSET_CSUM_TYPE(i), nonce,
+ &bn->flags,
+ (void *) &b->data->keys -
+ (void *) &b->data->flags);
+ nonce = nonce_add(nonce,
+ round_up((void *) &b->data->keys -
+ (void *) &b->data->flags,
+ CHACHA20_BLOCK_SIZE));
+ bset_encrypt(c, i, nonce);
+
+ nonce = btree_nonce(b, i, b->written << 9);
+ bn->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bn);
+ } else {
+ bset_encrypt(c, i, nonce);
- if (bn)
- bn->csum = cpu_to_le64(btree_csum_set(b, bn));
- else
- bne->csum = cpu_to_le64(btree_csum_set(b, bne));
+ bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne);
+ }
- bytes_to_write = (void *) bset_bkey_last(i) - data;
+ bytes_to_write = vstruct_end(i) - data;
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
memset(data + bytes_to_write, 0,
@@ -1548,7 +1613,7 @@ bool bch_btree_post_write_cleanup(struct cache_set *c, struct btree *b)
* If later we don't unconditionally sort down to a single bset, we have
* to ensure this is still true:
*/
- BUG_ON((void *) bset_bkey_last(btree_bset_last(b)) > write_block(b));
+ BUG_ON((void *) btree_bkey_last(b, bset_tree_last(b)) > write_block(b));
bne = want_new_bset(c, b);
if (bne)
diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h
index 176d42a7..4cbec7fe 100644
--- a/libbcache/btree_types.h
+++ b/libbcache/btree_types.h
@@ -202,24 +202,12 @@ __btree_node_offset_to_key(const struct btree *b, u16 k)
return (void *) ((u64 *) b->data + k + 1);
}
-#define __bkey_idx(_set, _offset) \
- ((_set)->_data + (_offset))
-
-#define bkey_idx(_set, _offset) \
- ((typeof(&(_set)->start[0])) __bkey_idx((_set), (_offset)))
-
-#define __bset_bkey_last(_set) \
- __bkey_idx((_set), (_set)->u64s)
-
-#define bset_bkey_last(_set) \
- bkey_idx((_set), le16_to_cpu((_set)->u64s))
-
#define btree_bkey_first(_b, _t) (bset(_b, _t)->start)
#define btree_bkey_last(_b, _t) \
({ \
EBUG_ON(__btree_node_offset_to_key(_b, (_t)->end_offset) != \
- bset_bkey_last(bset(_b, _t))); \
+ vstruct_last(bset(_b, _t))); \
\
__btree_node_offset_to_key(_b, (_t)->end_offset); \
})
@@ -227,7 +215,7 @@ __btree_node_offset_to_key(const struct btree *b, u16 k)
static inline void set_btree_bset_end(struct btree *b, struct bset_tree *t)
{
t->end_offset =
- __btree_node_key_to_offset(b, bset_bkey_last(bset(b, t)));
+ __btree_node_key_to_offset(b, vstruct_last(bset(b, t)));
btree_bkey_last(b, t);
}
diff --git a/libbcache/btree_update.c b/libbcache/btree_update.c
index 95406a44..c3bb2092 100644
--- a/libbcache/btree_update.c
+++ b/libbcache/btree_update.c
@@ -12,7 +12,7 @@
#include "extents.h"
#include "journal.h"
#include "keylist.h"
-#include "super.h"
+#include "super-io.h"
#include <linux/random.h>
#include <linux/sort.h>
@@ -80,7 +80,7 @@ bool bch_btree_node_format_fits(struct cache_set *c, struct btree *b,
{
size_t u64s = btree_node_u64s_with_format(b, new_f);
- return __set_bytes(b->data, u64s) < btree_bytes(c);
+ return __vstruct_bytes(struct btree_node, u64s) < btree_bytes(c);
}
/* Btree node freeing/allocation: */
@@ -298,8 +298,11 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c,
bch_bset_init_first(b, &b->data->keys);
memset(&b->nr, 0, sizeof(b->nr));
- b->data->magic = cpu_to_le64(bset_magic(&c->disk_sb));
- SET_BSET_BTREE_LEVEL(&b->data->keys, level);
+ b->data->magic = cpu_to_le64(bset_magic(c));
+ b->data->flags = 0;
+ SET_BTREE_NODE_ID(b->data, id);
+ SET_BTREE_NODE_LEVEL(b->data, level);
+ b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr;
bch_btree_build_aux_trees(b);
@@ -1292,7 +1295,7 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
*/
k = set1->start;
while (1) {
- if (bkey_next(k) == bset_bkey_last(set1))
+ if (bkey_next(k) == vstruct_last(set1))
break;
if (k->_data - set1->_data >= (le16_to_cpu(set1->u64s) * 3) / 5)
break;
@@ -1313,7 +1316,7 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
n2->data->min_key =
btree_type_successor(n1->btree_id, n1->key.k.p);
- set2->u64s = cpu_to_le16((u64 *) bset_bkey_last(set1) - (u64 *) k);
+ set2->u64s = cpu_to_le16((u64 *) vstruct_end(set1) - (u64 *) k);
set1->u64s = cpu_to_le16(le16_to_cpu(set1->u64s) - le16_to_cpu(set2->u64s));
set_btree_bset_end(n1, n1->set);
@@ -1333,7 +1336,7 @@ static struct btree *__btree_split_node(struct btree_iter *iter, struct btree *n
BUG_ON(!set2->u64s);
memcpy_u64s(set2->start,
- bset_bkey_last(set1),
+ vstruct_end(set1),
le16_to_cpu(set2->u64s));
btree_node_reset_sib_u64s(n1);
@@ -1393,12 +1396,12 @@ static void btree_split_insert_keys(struct btree_iter *iter, struct btree *b,
*/
i = btree_bset_first(b);
p = i->start;
- while (p != bset_bkey_last(i))
+ while (p != vstruct_last(i))
if (bkey_deleted(p)) {
le16_add_cpu(&i->u64s, -p->u64s);
set_btree_bset_end(b, b->set);
memmove_u64s_down(p, bkey_next(p),
- (u64 *) bset_bkey_last(i) -
+ (u64 *) vstruct_last(i) -
(u64 *) p);
} else
p = bkey_next(p);
@@ -1428,9 +1431,7 @@ static void btree_split(struct btree *b, struct btree_iter *iter,
if (b->level)
btree_split_insert_keys(iter, n1, insert_keys, reserve);
- if (__set_blocks(n1->data,
- le16_to_cpu(n1->data->keys.u64s),
- block_bytes(c)) > BTREE_SPLIT_THRESHOLD(c)) {
+ if (vstruct_blocks(n1->data, c->block_bits) > BTREE_SPLIT_THRESHOLD(c)) {
trace_bcache_btree_node_split(c, b, b->nr.live_u64s);
n2 = __btree_split_node(iter, n1, reserve);
@@ -1939,7 +1940,7 @@ retry:
u64s = 0;
trans_for_each_entry(trans, i)
if (!i->done)
- u64s += jset_u64s(i->k->k.u64s);
+ u64s += jset_u64s(i->k->k.u64s + i->extra_res);
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
@@ -1966,7 +1967,7 @@ retry:
* written one
*/
if (!i->done) {
- u64s += i->k->k.u64s;
+ u64s += i->k->k.u64s + i->extra_res;
if (!bch_btree_node_insert_fits(c,
i->iter->nodes[0], u64s)) {
split = i->iter;
@@ -2217,7 +2218,7 @@ int bch_btree_update(struct cache_set *c, enum btree_id id,
int bch_btree_delete_range(struct cache_set *c, enum btree_id id,
struct bpos start,
struct bpos end,
- u64 version,
+ struct bversion version,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq)
diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h
index 5fc1b1aa..8ff089da 100644
--- a/libbcache/btree_update.h
+++ b/libbcache/btree_update.h
@@ -5,6 +5,7 @@
#include "btree_iter.h"
#include "buckets.h"
#include "journal.h"
+#include "vstructs.h"
struct cache_set;
struct bkey_format_state;
@@ -200,7 +201,7 @@ static inline bool bset_unwritten(struct btree *b, struct bset *i)
static inline unsigned bset_end_sector(struct cache_set *c, struct btree *b,
struct bset *i)
{
- return round_up(bset_byte_offset(b, bset_bkey_last(i)),
+ return round_up(bset_byte_offset(b, vstruct_end(i)),
block_bytes(c)) >> 9;
}
@@ -208,7 +209,7 @@ static inline size_t bch_btree_keys_u64s_remaining(struct cache_set *c,
struct btree *b)
{
struct bset *i = btree_bset_last(b);
- unsigned used = bset_byte_offset(b, bset_bkey_last(i)) / sizeof(u64) +
+ unsigned used = bset_byte_offset(b, vstruct_end(i)) / sizeof(u64) +
b->whiteout_u64s +
b->uncompacted_whiteout_u64s;
unsigned total = c->sb.btree_node_size << 6;
@@ -235,7 +236,7 @@ static inline struct btree_node_entry *want_new_bset(struct cache_set *c,
{
struct bset *i = btree_bset_last(b);
unsigned offset = max_t(unsigned, b->written << 9,
- bset_byte_offset(b, bset_bkey_last(i)));
+ bset_byte_offset(b, vstruct_end(i)));
ssize_t n = (ssize_t) btree_bytes(c) - (ssize_t)
(offset + sizeof(struct btree_node_entry) +
b->whiteout_u64s * sizeof(u64) +
@@ -244,8 +245,8 @@ static inline struct btree_node_entry *want_new_bset(struct cache_set *c,
EBUG_ON(offset > btree_bytes(c));
if ((unlikely(bset_written(b, i)) && n > 0) ||
- (unlikely(__set_bytes(i, le16_to_cpu(i->u64s)) >
- btree_write_set_buffer(b)) && n > btree_write_set_buffer(b)))
+ (unlikely(vstruct_bytes(i) > btree_write_set_buffer(b)) &&
+ n > btree_write_set_buffer(b)))
return (void *) b->data + offset;
return NULL;
@@ -308,6 +309,7 @@ struct btree_insert {
struct btree_insert_entry {
struct btree_iter *iter;
struct bkey_i *k;
+ unsigned extra_res;
/*
* true if entire key was inserted - can only be false for
* extents
@@ -329,6 +331,14 @@ int __bch_btree_insert_at(struct btree_insert *);
.done = false, \
})
+#define BTREE_INSERT_ENTRY_EXTRA_RES(_iter, _k, _extra) \
+ ((struct btree_insert_entry) { \
+ .iter = (_iter), \
+ .k = (_k), \
+ .extra_res = (_extra), \
+ .done = false, \
+ })
+
/**
* bch_btree_insert_at - insert one or more keys at iterator positions
* @iter: btree iterator
@@ -391,7 +401,7 @@ static inline bool journal_res_insert_fits(struct btree_insert *trans,
return true;
for (i = insert; i < trans->entries + trans->nr; i++)
- u64s += jset_u64s(i->k->k.u64s);
+ u64s += jset_u64s(i->k->k.u64s + i->extra_res);
return u64s <= trans->journal_res.u64s;
}
@@ -404,7 +414,7 @@ int bch_btree_update(struct cache_set *, enum btree_id,
struct bkey_i *, u64 *);
int bch_btree_delete_range(struct cache_set *, enum btree_id,
- struct bpos, struct bpos, u64,
+ struct bpos, struct bpos, struct bversion,
struct disk_reservation *,
struct extent_insert_hook *, u64 *);
diff --git a/libbcache/buckets.c b/libbcache/buckets.c
index 3398b255..757bc035 100644
--- a/libbcache/buckets.c
+++ b/libbcache/buckets.c
@@ -534,12 +534,10 @@ static void bch_mark_extent(struct cache_set *c, struct bkey_s_c_extent e,
rcu_read_lock();
extent_for_each_online_device_crc(c, e, crc, ptr, ca) {
- bool dirty = bch_extent_ptr_is_dirty(c, e, ptr);
-
- trace_bcache_mark_bucket(ca, e.k, ptr, sectors, dirty);
+ trace_bcache_mark_bucket(ca, e.k, ptr, sectors, !ptr->cached);
bch_mark_pointer(c, e, ca, crc, ptr, sectors,
- dirty ? type : S_CACHED,
+ ptr->cached ? S_CACHED : type,
may_make_unavailable,
stats, gc_will_visit, journal_seq);
}
@@ -559,10 +557,13 @@ static void __bch_mark_key(struct cache_set *c, struct bkey_s_c k,
may_make_unavailable, stats,
gc_will_visit, journal_seq);
break;
- case BCH_RESERVATION:
- stats->persistent_reserved += sectors;
+ case BCH_RESERVATION: {
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ stats->persistent_reserved += r.v->nr_replicas * sectors;
break;
}
+ }
}
void __bch_gc_mark_key(struct cache_set *c, struct bkey_s_c k,
diff --git a/libbcache/buckets.h b/libbcache/buckets.h
index 35100eba..8194dd9b 100644
--- a/libbcache/buckets.h
+++ b/libbcache/buckets.h
@@ -42,7 +42,7 @@ static inline u8 bucket_gc_gen(struct cache *ca, struct bucket *g)
static inline struct cache *PTR_CACHE(const struct cache_set *c,
const struct bch_extent_ptr *ptr)
{
- EBUG_ON(ptr->dev > rcu_dereference(c->members)->nr_in_set);
+ EBUG_ON(ptr->dev > rcu_dereference(c->members)->nr_devices);
return rcu_dereference(c->cache[ptr->dev]);
}
diff --git a/libbcache/chardev.c b/libbcache/chardev.c
index 0b020c84..b361b092 100644
--- a/libbcache/chardev.c
+++ b/libbcache/chardev.c
@@ -9,6 +9,7 @@
#include "bcache.h"
#include "super.h"
+#include "super-io.h"
#include <linux/module.h>
#include <linux/fs.h>
@@ -202,16 +203,16 @@ static long bch_ioctl_disk_fail(struct cache_set *c,
return ret;
}
-static struct cache_member *bch_uuid_lookup(struct cache_set *c, uuid_le uuid)
+static struct bch_member *bch_uuid_lookup(struct cache_set *c, uuid_le uuid)
{
- struct cache_member *mi = c->disk_mi;
+ struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb);
unsigned i;
- lockdep_assert_held(&bch_register_lock);
+ lockdep_assert_held(&c->sb_lock);
- for (i = 0; i < c->disk_sb.nr_in_set; i++)
- if (!memcmp(&mi[i].uuid, &uuid, sizeof(uuid)))
- return &mi[i];
+ for (i = 0; i < c->disk_sb->nr_devices; i++)
+ if (!memcmp(&mi->members[i].uuid, &uuid, sizeof(uuid)))
+ return &mi->members[i];
return NULL;
}
@@ -220,20 +221,20 @@ static long bch_ioctl_disk_remove_by_uuid(struct cache_set *c,
struct bch_ioctl_disk_remove_by_uuid __user *user_arg)
{
struct bch_ioctl_disk_fail_by_uuid arg;
- struct cache_member *m;
+ struct bch_member *m;
int ret = -ENOENT;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
- mutex_lock(&bch_register_lock);
+ mutex_lock(&c->sb_lock);
if ((m = bch_uuid_lookup(c, arg.dev))) {
/* XXX: */
- SET_CACHE_STATE(m, CACHE_FAILED);
- bcache_write_super(c);
+ SET_BCH_MEMBER_STATE(m, BCH_MEMBER_STATE_FAILED);
+ bch_write_super(c);
ret = 0;
}
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&c->sb_lock);
return ret;
}
@@ -242,19 +243,19 @@ static long bch_ioctl_disk_fail_by_uuid(struct cache_set *c,
struct bch_ioctl_disk_fail_by_uuid __user *user_arg)
{
struct bch_ioctl_disk_fail_by_uuid arg;
- struct cache_member *m;
+ struct bch_member *m;
int ret = -ENOENT;
if (copy_from_user(&arg, user_arg, sizeof(arg)))
return -EFAULT;
- mutex_lock(&bch_register_lock);
+ mutex_lock(&c->sb_lock);
if ((m = bch_uuid_lookup(c, arg.dev))) {
- SET_CACHE_STATE(m, CACHE_FAILED);
- bcache_write_super(c);
+ SET_BCH_MEMBER_STATE(m, BCH_MEMBER_STATE_FAILED);
+ bch_write_super(c);
ret = 0;
}
- mutex_unlock(&bch_register_lock);
+ mutex_unlock(&c->sb_lock);
return ret;
}
@@ -263,8 +264,8 @@ static long bch_ioctl_query_uuid(struct cache_set *c,
struct bch_ioctl_query_uuid __user *user_arg)
{
return copy_to_user(&user_arg->uuid,
- &c->disk_sb.user_uuid,
- sizeof(c->disk_sb.user_uuid));
+ &c->sb.user_uuid,
+ sizeof(c->sb.user_uuid));
}
long bch_cache_set_ioctl(struct cache_set *c, unsigned cmd, void __user *arg)
diff --git a/libbcache/checksum.c b/libbcache/checksum.c
index beae0b26..eb41f2ea 100644
--- a/libbcache/checksum.c
+++ b/libbcache/checksum.c
@@ -1,11 +1,19 @@
#include "bcache.h"
#include "checksum.h"
+#include "super.h"
+#include "super-io.h"
#include <linux/crc32c.h>
+#include <linux/crypto.h>
+#include <linux/key.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <crypto/algapi.h>
#include <crypto/chacha20.h>
#include <crypto/hash.h>
#include <crypto/poly1305.h>
+#include <keys/user-type.h>
/*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
@@ -129,7 +137,35 @@ u64 bch_crc64_update(u64 crc, const void *_data, size_t len)
return crc;
}
-u64 bch_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
+static u64 bch_checksum_init(unsigned type)
+{
+ switch (type) {
+ case BCH_CSUM_NONE:
+ return 0;
+ case BCH_CSUM_CRC32C:
+ return U32_MAX;
+ case BCH_CSUM_CRC64:
+ return U64_MAX;
+ default:
+ BUG();
+ }
+}
+
+static u64 bch_checksum_final(unsigned type, u64 crc)
+{
+ switch (type) {
+ case BCH_CSUM_NONE:
+ return 0;
+ case BCH_CSUM_CRC32C:
+ return crc ^ U32_MAX;
+ case BCH_CSUM_CRC64:
+ return crc ^ U64_MAX;
+ default:
+ BUG();
+ }
+}
+
+static u64 bch_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
{
switch (type) {
case BCH_CSUM_NONE:
@@ -143,32 +179,416 @@ u64 bch_checksum_update(unsigned type, u64 crc, const void *data, size_t len)
}
}
-u64 bch_checksum(unsigned type, const void *data, size_t len)
+static inline void do_encrypt_sg(struct crypto_blkcipher *tfm,
+ struct nonce nonce,
+ struct scatterlist *sg, size_t len)
+{
+ struct blkcipher_desc desc = { .tfm = tfm, .info = nonce.d };
+ int ret;
+
+ ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+ BUG_ON(ret);
+}
+
+static inline void do_encrypt(struct crypto_blkcipher *tfm,
+ struct nonce nonce,
+ void *buf, size_t len)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, buf, len);
+ do_encrypt_sg(tfm, nonce, &sg, len);
+}
+
+int bch_chacha_encrypt_key(struct bch_key *key, struct nonce nonce,
+ void *buf, size_t len)
+{
+ struct crypto_blkcipher *chacha20 =
+ crypto_alloc_blkcipher("chacha20", 0, CRYPTO_ALG_ASYNC);
+ int ret;
+
+ if (!chacha20)
+ return PTR_ERR(chacha20);
+
+ ret = crypto_blkcipher_setkey(chacha20, (void *) key, sizeof(*key));
+ if (ret)
+ goto err;
+
+ do_encrypt(chacha20, nonce, buf, len);
+err:
+ crypto_free_blkcipher(chacha20);
+ return ret;
+}
+
+static void gen_poly_key(struct cache_set *c, struct shash_desc *desc,
+ struct nonce nonce)
+{
+ u8 key[POLY1305_KEY_SIZE];
+
+ nonce.d[3] ^= BCH_NONCE_POLY;
+
+ memset(key, 0, sizeof(key));
+ do_encrypt(c->chacha20, nonce, key, sizeof(key));
+
+ desc->tfm = c->poly1305;
+ desc->flags = 0;
+ crypto_shash_init(desc);
+ crypto_shash_update(desc, key, sizeof(key));
+}
+
+struct bch_csum bch_checksum(struct cache_set *c, unsigned type,
+ struct nonce nonce, const void *data, size_t len)
{
- u64 crc = 0xffffffffffffffffULL;
+ switch (type) {
+ case BCH_CSUM_NONE:
+ case BCH_CSUM_CRC32C:
+ case BCH_CSUM_CRC64: {
+ u64 crc = bch_checksum_init(type);
+
+ crc = bch_checksum_update(type, crc, data, len);
+ crc = bch_checksum_final(type, crc);
+
+ return (struct bch_csum) { .lo = crc };
+ }
+
+ case BCH_CSUM_CHACHA20_POLY1305_80:
+ case BCH_CSUM_CHACHA20_POLY1305_128: {
+ SHASH_DESC_ON_STACK(desc, c->poly1305);
+ u8 digest[POLY1305_DIGEST_SIZE];
+ struct bch_csum ret = { 0 };
+
+ gen_poly_key(c, desc, nonce);
+
+ crypto_shash_update(desc, data, len);
+ crypto_shash_final(desc, digest);
+
+ memcpy(&ret, digest, bch_crc_bytes[type]);
+ return ret;
+ }
+ default:
+ BUG();
+ }
+}
- crc = bch_checksum_update(type, crc, data, len);
+void bch_encrypt(struct cache_set *c, unsigned type,
+ struct nonce nonce, void *data, size_t len)
+{
+ if (!bch_csum_type_is_encryption(type))
+ return;
- return crc ^ 0xffffffffffffffffULL;
+ do_encrypt(c->chacha20, nonce, data, len);
}
-u32 bch_checksum_bio(struct bio *bio, unsigned type)
+struct bch_csum bch_checksum_bio(struct cache_set *c, unsigned type,
+ struct nonce nonce, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
- u32 csum = U32_MAX;
- if (type == BCH_CSUM_NONE)
- return 0;
+ switch (type) {
+ case BCH_CSUM_NONE:
+ return (struct bch_csum) { 0 };
+ case BCH_CSUM_CRC32C:
+ case BCH_CSUM_CRC64: {
+ u64 crc = bch_checksum_init(type);
+
+ bio_for_each_segment(bv, bio, iter) {
+ void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+
+ crc = bch_checksum_update(type,
+ crc, p, bv.bv_len);
+ kunmap_atomic(p);
+ }
+
+ crc = bch_checksum_final(type, crc);
+ return (struct bch_csum) { .lo = crc };
+ }
+
+ case BCH_CSUM_CHACHA20_POLY1305_80:
+ case BCH_CSUM_CHACHA20_POLY1305_128: {
+ SHASH_DESC_ON_STACK(desc, c->poly1305);
+ u8 digest[POLY1305_DIGEST_SIZE];
+ struct bch_csum ret = { 0 };
+
+ gen_poly_key(c, desc, nonce);
+
+ bio_for_each_segment(bv, bio, iter) {
+ void *p = kmap_atomic(bv.bv_page) + bv.bv_offset;
+
+ crypto_shash_update(desc, p, bv.bv_len);
+ kunmap_atomic(p);
+ }
+
+ crypto_shash_final(desc, digest);
+
+ memcpy(&ret, digest, bch_crc_bytes[type]);
+ return ret;
+ }
+ default:
+ BUG();
+ }
+}
+
+void bch_encrypt_bio(struct cache_set *c, unsigned type,
+ struct nonce nonce, struct bio *bio)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ struct scatterlist sgl[16], *sg = sgl;
+ size_t bytes = 0;
+
+ if (!bch_csum_type_is_encryption(type))
+ return;
+
+ sg_init_table(sgl, ARRAY_SIZE(sgl));
bio_for_each_segment(bv, bio, iter) {
- void *p = kmap_atomic(bv.bv_page);
+ if (sg == sgl + ARRAY_SIZE(sgl)) {
+ sg_mark_end(sg - 1);
+ do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+
+ le32_add_cpu(nonce.d, bytes / CHACHA20_BLOCK_SIZE);
+ bytes = 0;
+
+ sg_init_table(sgl, ARRAY_SIZE(sgl));
+ sg = sgl;
+ }
+
+ sg_set_page(sg++, bv.bv_page, bv.bv_len, bv.bv_offset);
+ bytes += bv.bv_len;
+
+ }
+
+ sg_mark_end(sg - 1);
+ do_encrypt_sg(c->chacha20, nonce, sgl, bytes);
+}
+
+#ifdef __KERNEL__
+int bch_request_key(struct bch_sb *sb, struct bch_key *key)
+{
+ char key_description[60];
+ struct key *keyring_key;
+ const struct user_key_payload *ukp;
+ int ret;
+
+ snprintf(key_description, sizeof(key_description),
+ "bcache:%pUb", &sb->user_uuid);
+
+ keyring_key = request_key(&key_type_logon, key_description, NULL);
+ if (IS_ERR(keyring_key))
+ return PTR_ERR(keyring_key);
+
+ down_read(&keyring_key->sem);
+ ukp = user_key_payload(keyring_key);
+ if (ukp->datalen == sizeof(*key)) {
+ memcpy(key, ukp->data, ukp->datalen);
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ up_read(&keyring_key->sem);
+ key_put(keyring_key);
+
+ return ret;
+}
+#else
+#include <keyutils.h>
+#include <uuid/uuid.h>
+
+int bch_request_key(struct bch_sb *sb, struct bch_key *key)
+{
+ key_serial_t key_id;
+ char key_description[60];
+ char uuid[40];
+
+ uuid_unparse_lower(sb->user_uuid.b, uuid);
+ sprintf(key_description, "bcache:%s", uuid);
+
+ key_id = request_key("user", key_description, NULL,
+ KEY_SPEC_USER_KEYRING);
+ if (key_id < 0)
+ return -errno;
+
+ if (keyctl_read(key_id, (void *) key, sizeof(*key)) != sizeof(*key))
+ return -1;
+
+ return 0;
+}
+#endif
- csum = bch_checksum_update(type, csum,
- p + bv.bv_offset,
- bv.bv_len);
- kunmap_atomic(p);
+static int bch_decrypt_sb_key(struct cache_set *c,
+ struct bch_sb_field_crypt *crypt,
+ struct bch_key *key)
+{
+ struct bch_encrypted_key sb_key = crypt->key;
+ struct bch_key user_key;
+ int ret = 0;
+
+ /* is key encrypted? */
+ if (!bch_key_is_encrypted(&sb_key))
+ goto out;
+
+ ret = bch_request_key(c->disk_sb, &user_key);
+ if (ret) {
+ bch_err(c, "error requesting encryption key");
+ goto err;
}
- return csum ^= U32_MAX;
+ /* decrypt real key: */
+ ret = bch_chacha_encrypt_key(&user_key, bch_sb_key_nonce(c),
+ &sb_key, sizeof(sb_key));
+ if (ret)
+ goto err;
+
+ if (bch_key_is_encrypted(&sb_key)) {
+ bch_err(c, "incorrect encryption key");
+ ret = -EINVAL;
+ goto err;
+ }
+out:
+ *key = sb_key.key;
+err:
+ memzero_explicit(&sb_key, sizeof(sb_key));
+ memzero_explicit(&user_key, sizeof(user_key));
+ return ret;
+}
+
+static int bch_alloc_ciphers(struct cache_set *c)
+{
+ if (!c->chacha20)
+ c->chacha20 = crypto_alloc_blkcipher("chacha20", 0,
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(c->chacha20))
+ return PTR_ERR(c->chacha20);
+
+ if (!c->poly1305)
+ c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
+ if (IS_ERR(c->poly1305))
+ return PTR_ERR(c->poly1305);
+
+ return 0;
+}
+
+int bch_disable_encryption(struct cache_set *c)
+{
+ struct bch_sb_field_crypt *crypt;
+ struct bch_key key;
+ int ret = -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+
+ crypt = bch_sb_get_crypt(c->disk_sb);
+ if (!crypt)
+ goto out;
+
+ /* is key encrypted? */
+ ret = 0;
+ if (bch_key_is_encrypted(&crypt->key))
+ goto out;
+
+ ret = bch_decrypt_sb_key(c, crypt, &key);
+ if (ret)
+ goto out;
+
+ crypt->key.magic = BCH_KEY_MAGIC;
+ crypt->key.key = key;
+
+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb, 0);
+ bch_write_super(c);
+out:
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
+}
+
+int bch_enable_encryption(struct cache_set *c, bool keyed)
+{
+ struct bch_encrypted_key key;
+ struct bch_key user_key;
+ struct bch_sb_field_crypt *crypt;
+ int ret = -EINVAL;
+
+ mutex_lock(&c->sb_lock);
+
+ /* Do we already have an encryption key? */
+ if (bch_sb_get_crypt(c->disk_sb))
+ goto err;
+
+ ret = bch_alloc_ciphers(c);
+ if (ret)
+ goto err;
+
+ key.magic = BCH_KEY_MAGIC;
+ get_random_bytes(&key.key, sizeof(key.key));
+
+ if (keyed) {
+ ret = bch_request_key(c->disk_sb, &user_key);
+ if (ret) {
+ bch_err(c, "error requesting encryption key");
+ goto err;
+ }
+
+ ret = bch_chacha_encrypt_key(&user_key, bch_sb_key_nonce(c),
+ &key, sizeof(key));
+ if (ret)
+ goto err;
+ }
+
+ ret = crypto_blkcipher_setkey(c->chacha20,
+ (void *) &key.key, sizeof(key.key));
+ if (ret)
+ goto err;
+
+ crypt = container_of_or_null(bch_fs_sb_field_resize(c, NULL,
+ sizeof(*crypt) / sizeof(u64)),
+ struct bch_sb_field_crypt, field);
+ if (!crypt) {
+ ret = -ENOMEM; /* XXX this technically could be -ENOSPC */
+ goto err;
+ }
+
+ crypt->field.type = BCH_SB_FIELD_crypt;
+ crypt->key = key;
+
+ /* write superblock */
+ SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb, 1);
+ bch_write_super(c);
+err:
+ mutex_unlock(&c->sb_lock);
+ memzero_explicit(&user_key, sizeof(user_key));
+ memzero_explicit(&key, sizeof(key));
+ return ret;
+}
+
+void bch_cache_set_encryption_free(struct cache_set *c)
+{
+ if (!IS_ERR_OR_NULL(c->poly1305))
+ crypto_free_shash(c->poly1305);
+ if (!IS_ERR_OR_NULL(c->chacha20))
+ crypto_free_blkcipher(c->chacha20);
+}
+
+int bch_cache_set_encryption_init(struct cache_set *c)
+{
+ struct bch_sb_field_crypt *crypt;
+ struct bch_key key;
+ int ret;
+
+ crypt = bch_sb_get_crypt(c->disk_sb);
+ if (!crypt)
+ return 0;
+
+ ret = bch_alloc_ciphers(c);
+ if (ret)
+ return ret;
+
+ ret = bch_decrypt_sb_key(c, crypt, &key);
+ if (ret)
+ goto err;
+
+ ret = crypto_blkcipher_setkey(c->chacha20,
+ (void *) &key.key, sizeof(key.key));
+err:
+ memzero_explicit(&key, sizeof(key));
+ return ret;
}
diff --git a/libbcache/checksum.h b/libbcache/checksum.h
index 196b7e8c..a9a17587 100644
--- a/libbcache/checksum.h
+++ b/libbcache/checksum.h
@@ -1,24 +1,133 @@
#ifndef _BCACHE_CHECKSUM_H
#define _BCACHE_CHECKSUM_H
-#include "btree_types.h"
+#include "bcache.h"
+#include "super-io.h"
+
+#include <crypto/chacha20.h>
u64 bch_crc64_update(u64, const void *, size_t);
-u64 bch_checksum_update(unsigned, u64, const void *, size_t);
-u64 bch_checksum(unsigned, const void *, size_t);
-u32 bch_checksum_bio(struct bio *, unsigned);
+#define BCH_NONCE_EXTENT cpu_to_le32(1 << 28)
+#define BCH_NONCE_BTREE cpu_to_le32(2 << 28)
+#define BCH_NONCE_JOURNAL cpu_to_le32(3 << 28)
+#define BCH_NONCE_PRIO cpu_to_le32(4 << 28)
+#define BCH_NONCE_POLY cpu_to_le32(1 << 31)
+
+struct bch_csum bch_checksum(struct cache_set *, unsigned, struct nonce,
+ const void *, size_t);
/*
- * This is used for various on disk data structures - cache_sb, prio_set, bset,
- * jset: The checksum is _always_ the first 8 bytes of these structs
+ * This is used for various on disk data structures - bch_sb, prio_set, bset,
+ * jset: The checksum is _always_ the first field of these structs
*/
-#define __csum_set(i, u64s, type) \
+#define csum_vstruct(_c, _type, _nonce, _i) \
({ \
- const void *start = ((const void *) (i)) + sizeof(u64); \
- const void *end = __bkey_idx(i, u64s); \
+ const void *start = ((const void *) (_i)) + sizeof((_i)->csum); \
+ const void *end = vstruct_end(_i); \
\
- bch_checksum(type, start, end - start); \
+ bch_checksum(_c, _type, _nonce, start, end - start); \
})
+int bch_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
+int bch_request_key(struct bch_sb *, struct bch_key *);
+
+void bch_encrypt(struct cache_set *, unsigned, struct nonce,
+ void *data, size_t);
+
+struct bch_csum bch_checksum_bio(struct cache_set *, unsigned,
+ struct nonce, struct bio *);
+void bch_encrypt_bio(struct cache_set *, unsigned,
+ struct nonce, struct bio *);
+
+int bch_disable_encryption(struct cache_set *);
+int bch_enable_encryption(struct cache_set *, bool);
+
+void bch_cache_set_encryption_free(struct cache_set *);
+int bch_cache_set_encryption_init(struct cache_set *);
+
+static inline unsigned bch_data_checksum_type(struct cache_set *c)
+{
+ if (c->sb.encryption_type)
+ return c->opts.wide_macs
+ ? BCH_CSUM_CHACHA20_POLY1305_128
+ : BCH_CSUM_CHACHA20_POLY1305_80;
+
+ return c->opts.data_checksum;
+}
+
+static inline unsigned bch_meta_checksum_type(struct cache_set *c)
+{
+ return c->sb.encryption_type
+ ? BCH_CSUM_CHACHA20_POLY1305_128
+ : c->opts.metadata_checksum;
+}
+
+static inline bool bch_checksum_type_valid(const struct cache_set *c,
+ unsigned type)
+{
+ if (type >= BCH_CSUM_NR)
+ return false;
+
+ if (bch_csum_type_is_encryption(type) && !c->chacha20)
+ return false;
+
+ return true;
+}
+
+static const unsigned bch_crc_bytes[] = {
+ [BCH_CSUM_NONE] = 0,
+ [BCH_CSUM_CRC32C] = 4,
+ [BCH_CSUM_CRC64] = 8,
+ [BCH_CSUM_CHACHA20_POLY1305_80] = 10,
+ [BCH_CSUM_CHACHA20_POLY1305_128] = 16,
+};
+
+static inline bool bch_crc_cmp(struct bch_csum l, struct bch_csum r)
+{
+ /*
+ * XXX: need some way of preventing the compiler from optimizing this
+ * into a form that isn't constant time..
+ */
+ return ((l.lo ^ r.lo) | (l.hi ^ r.hi)) != 0;
+}
+
+/* for skipping ahead and encrypting/decrypting at an offset: */
+static inline struct nonce nonce_add(struct nonce nonce, unsigned offset)
+{
+ EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1));
+
+ le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE);
+ return nonce;
+}
+
+static inline bool bch_key_is_encrypted(struct bch_encrypted_key *key)
+{
+ return le64_to_cpu(key->magic) != BCH_KEY_MAGIC;
+}
+
+static inline struct nonce __bch_sb_key_nonce(struct bch_sb *sb)
+{
+ __le64 magic = __bch_sb_magic(sb);
+
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = 0,
+ [2] = ((__le32 *) &magic)[0],
+ [3] = ((__le32 *) &magic)[1],
+ }};
+}
+
+static inline struct nonce bch_sb_key_nonce(struct cache_set *c)
+{
+ __le64 magic = bch_sb_magic(c);
+
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = 0,
+ [2] = ((__le32 *) &magic)[0],
+ [3] = ((__le32 *) &magic)[1],
+ }};
+}
+
#endif /* _BCACHE_CHECKSUM_H */
diff --git a/libbcache/compress.c b/libbcache/compress.c
index f7bfd57f..e76850be 100644
--- a/libbcache/compress.c
+++ b/libbcache/compress.c
@@ -1,6 +1,8 @@
#include "bcache.h"
#include "compress.h"
+#include "extents.h"
#include "io.h"
+#include "super-io.h"
#include <linux/lz4.h>
#include <linux/zlib.h>
@@ -50,7 +52,7 @@ static void *__bio_map_or_bounce(struct cache_set *c,
unsigned prev_end = PAGE_SIZE;
void *data;
- BUG_ON(bvec_iter_sectors(start) > BCH_COMPRESSED_EXTENT_MAX);
+ BUG_ON(bvec_iter_sectors(start) > BCH_ENCODED_EXTENT_MAX);
*bounced = BOUNCED_MAPPED;
@@ -118,12 +120,12 @@ static void bio_unmap_or_unbounce(struct cache_set *c, void *data,
}
static int __bio_uncompress(struct cache_set *c, struct bio *src,
- void *dst_data, struct bch_extent_crc64 crc)
+ void *dst_data, struct bch_extent_crc128 crc)
{
void *src_data = NULL;
unsigned src_bounced;
size_t src_len = src->bi_iter.bi_size;
- size_t dst_len = crc.uncompressed_size << 9;
+ size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
int ret;
src_data = bio_map_or_bounce(c, src, &src_bounced, READ);
@@ -179,10 +181,10 @@ err:
int bch_bio_uncompress_inplace(struct cache_set *c, struct bio *bio,
unsigned live_data_sectors,
- struct bch_extent_crc64 crc)
+ struct bch_extent_crc128 crc)
{
void *dst_data = NULL;
- size_t dst_len = crc.uncompressed_size << 9;
+ size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
int ret = -ENOMEM;
BUG_ON(DIV_ROUND_UP(live_data_sectors, PAGE_SECTORS) > bio->bi_max_vecs);
@@ -231,11 +233,11 @@ use_mempool:
int bch_bio_uncompress(struct cache_set *c, struct bio *src,
struct bio *dst, struct bvec_iter dst_iter,
- struct bch_extent_crc64 crc)
+ struct bch_extent_crc128 crc)
{
void *dst_data = NULL;
unsigned dst_bounced;
- size_t dst_len = crc.uncompressed_size << 9;
+ size_t dst_len = crc_uncompressed_size(NULL, &crc) << 9;
int ret = -ENOMEM;
dst_data = dst_len == dst_iter.bi_size
@@ -273,28 +275,23 @@ static int __bio_compress(struct cache_set *c,
*src_len = src->bi_iter.bi_size;
workspace = mempool_alloc(&c->lz4_workspace_pool, GFP_NOIO);
-retry_compress:
- ret = lz4_compress(src_data, *src_len,
- dst_data, dst_len,
- workspace);
- /*
- * On error, the compressed data was bigger than dst_len, and
- * -ret is the amount of data we were able to compress - round
- * down to nearest block and try again:
- */
- if (ret && round_down(-ret, block_bytes(c)) > *dst_len) {
- BUG_ON(ret > 0);
- /* not supposed to happen */
- if (WARN_ON(-ret >= *src_len))
- goto err;
+ while (*src_len > block_bytes(c) &&
+ (ret = lz4_compress(src_data, *src_len,
+ dst_data, dst_len,
+ workspace))) {
+ /*
+ * On error, the compressed data was bigger than
+ * dst_len, and -ret is the amount of data we were able
+ * to compress - round down to nearest block and try
+ * again:
+ */
+ BUG_ON(ret > 0);
+ BUG_ON(-ret >= *src_len);
*src_len = round_down(-ret, block_bytes(c));
- if (!*src_len)
- goto err;
-
- goto retry_compress;
}
+
mempool_free(workspace, &c->lz4_workspace_pool);
if (ret)
@@ -354,6 +351,10 @@ zlib_err:
}
BUG_ON(!*dst_len);
+ BUG_ON(*dst_len > dst->bi_iter.bi_size);
+
+ BUG_ON(*src_len & (block_bytes(c) - 1));
+ BUG_ON(*src_len > src->bi_iter.bi_size);
/* Didn't get smaller: */
if (round_up(*dst_len, block_bytes(c)) >= *src_len) {
@@ -382,9 +383,9 @@ void bch_bio_compress(struct cache_set *c,
unsigned orig_dst = dst->bi_iter.bi_size;
unsigned orig_src = src->bi_iter.bi_size;
- /* Don't consume more than BCH_COMPRESSED_EXTENT_MAX from @src: */
+ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
src->bi_iter.bi_size =
- min(src->bi_iter.bi_size, BCH_COMPRESSED_EXTENT_MAX << 9);
+ min(src->bi_iter.bi_size, BCH_ENCODED_EXTENT_MAX << 9);
/* Don't generate a bigger output than input: */
dst->bi_iter.bi_size =
@@ -405,6 +406,30 @@ out:
src->bi_iter.bi_size = orig_src;
}
+/* doesn't write superblock: */
+int bch_check_set_has_compressed_data(struct cache_set *c,
+ unsigned compression_type)
+{
+ switch (compression_type) {
+ case BCH_COMPRESSION_NONE:
+ return 0;
+ case BCH_COMPRESSION_LZ4:
+ if (bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4))
+ return 0;
+
+ bch_sb_set_feature(c->disk_sb, BCH_FEATURE_LZ4);
+ break;
+ case BCH_COMPRESSION_GZIP:
+ if (bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
+ return 0;
+
+ bch_sb_set_feature(c->disk_sb, BCH_FEATURE_GZIP);
+ break;
+ }
+
+ return bch_compress_init(c);
+}
+
void bch_compress_free(struct cache_set *c)
{
vfree(c->zlib_workspace);
@@ -420,39 +445,56 @@ void bch_compress_free(struct cache_set *c)
int bch_compress_init(struct cache_set *c)
{
+ unsigned order = get_order(BCH_ENCODED_EXTENT_MAX << 9);
int ret, cpu;
- c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
- if (!c->bio_decompress_worker)
- return -ENOMEM;
+ if (!bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4) &&
+ !bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP))
+ return 0;
- for_each_possible_cpu(cpu) {
- struct bio_decompress_worker *d =
- per_cpu_ptr(c->bio_decompress_worker, cpu);
+ if (!c->bio_decompress_worker) {
+ c->bio_decompress_worker = alloc_percpu(*c->bio_decompress_worker);
+ if (!c->bio_decompress_worker)
+ return -ENOMEM;
- d->c = c;
- INIT_WORK(&d->work, bch_bio_decompress_work);
- init_llist_head(&d->bio_list);
+ for_each_possible_cpu(cpu) {
+ struct bio_decompress_worker *d =
+ per_cpu_ptr(c->bio_decompress_worker, cpu);
+
+ d->c = c;
+ INIT_WORK(&d->work, bch_bio_decompress_work);
+ init_llist_head(&d->bio_list);
+ }
}
- ret = mempool_init_page_pool(&c->compression_bounce[READ], 1,
- get_order(BCH_COMPRESSED_EXTENT_MAX << 9));
- if (ret)
- return ret;
+ if (!mempool_initialized(&c->compression_bounce[READ])) {
+ ret = mempool_init_page_pool(&c->compression_bounce[READ],
+ 1, order);
+ if (ret)
+ return ret;
+ }
- ret = mempool_init_page_pool(&c->compression_bounce[WRITE], 1,
- get_order(BCH_COMPRESSED_EXTENT_MAX << 9));
- if (ret)
- return ret;
+ if (!mempool_initialized(&c->compression_bounce[WRITE])) {
+ ret = mempool_init_page_pool(&c->compression_bounce[WRITE],
+ 1, order);
+ if (ret)
+ return ret;
+ }
- ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool, 1,
- LZ4_MEM_COMPRESS);
- if (ret)
- return ret;
+ if (!mempool_initialized(&c->lz4_workspace_pool) &&
+ bch_sb_test_feature(c->disk_sb, BCH_FEATURE_LZ4)) {
+ ret = mempool_init_kmalloc_pool(&c->lz4_workspace_pool,
+ 1, LZ4_MEM_COMPRESS);
+ if (ret)
+ return ret;
+ }
- c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE);
- if (!c->zlib_workspace)
- return -ENOMEM;
+ if (!c->zlib_workspace &&
+ bch_sb_test_feature(c->disk_sb, BCH_FEATURE_GZIP)) {
+ c->zlib_workspace = vmalloc(COMPRESSION_WORKSPACE_SIZE);
+ if (!c->zlib_workspace)
+ return -ENOMEM;
+ }
return 0;
}
diff --git a/libbcache/compress.h b/libbcache/compress.h
index 02578ef7..485acd95 100644
--- a/libbcache/compress.h
+++ b/libbcache/compress.h
@@ -2,12 +2,13 @@
#define _BCACHE_COMPRESS_H
int bch_bio_uncompress_inplace(struct cache_set *, struct bio *,
- unsigned, struct bch_extent_crc64);
+ unsigned, struct bch_extent_crc128);
int bch_bio_uncompress(struct cache_set *, struct bio *, struct bio *,
- struct bvec_iter, struct bch_extent_crc64);
+ struct bvec_iter, struct bch_extent_crc128);
void bch_bio_compress(struct cache_set *, struct bio *, size_t *,
struct bio *, size_t *, unsigned *);
+int bch_check_set_has_compressed_data(struct cache_set *, unsigned);
void bch_compress_free(struct cache_set *);
int bch_compress_init(struct cache_set *);
diff --git a/libbcache/debug.c b/libbcache/debug.c
index 39f5550e..d25c32ae 100644
--- a/libbcache/debug.c
+++ b/libbcache/debug.c
@@ -96,7 +96,7 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b)
if (inmemory->u64s != sorted->u64s ||
memcmp(inmemory->start,
sorted->start,
- (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
+ vstruct_end(inmemory) - (void *) inmemory->start)) {
unsigned offset = 0, sectors;
struct bset *i;
unsigned j;
@@ -112,18 +112,14 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b)
while (offset < b->written) {
if (!offset ) {
i = &n_ondisk->keys;
- sectors = __set_blocks(n_ondisk,
- le16_to_cpu(n_ondisk->keys.u64s),
- block_bytes(c)) <<
+ sectors = vstruct_blocks(n_ondisk, c->block_bits) <<
c->block_bits;
} else {
struct btree_node_entry *bne =
(void *) n_ondisk + (offset << 9);
i = &bne->keys;
- sectors = __set_blocks(bne,
- le16_to_cpu(bne->keys.u64s),
- block_bytes(c)) <<
+ sectors = vstruct_blocks(bne, c->block_bits) <<
c->block_bits;
}
@@ -427,7 +423,7 @@ void bch_debug_init_cache_set(struct cache_set *c)
if (IS_ERR_OR_NULL(bch_debug))
return;
- snprintf(name, sizeof(name), "%pU", c->disk_sb.user_uuid.b);
+ snprintf(name, sizeof(name), "%pU", c->sb.user_uuid.b);
c->debug = debugfs_create_dir(name, bch_debug);
if (IS_ERR_OR_NULL(c->debug))
return;
diff --git a/libbcache/dirent.c b/libbcache/dirent.c
index d97c3b22..ebf0f101 100644
--- a/libbcache/dirent.c
+++ b/libbcache/dirent.c
@@ -23,34 +23,13 @@ unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent d)
static u64 bch_dirent_hash(const struct bch_hash_info *info,
const struct qstr *name)
{
- switch (info->type) {
- case BCH_STR_HASH_SHA1: {
- SHASH_DESC_ON_STACK(desc, bch_sha1);
- u8 digest[SHA1_DIGEST_SIZE];
- u64 ret;
- desc->tfm = bch_sha1;
- desc->flags = 0;
- crypto_shash_init(desc);
-
- crypto_shash_update(desc, (void *) &info->seed, sizeof(info->seed));
-
- crypto_shash_update(desc, (void *) name->name, name->len);
- crypto_shash_final(desc, digest);
- memcpy(&ret, &digest, sizeof(ret));
- return max_t(u64, ret >> 1, 2);
- }
- default: {
- struct bch_str_hash_ctx ctx;
-
- bch_str_hash_init(&ctx, info->type);
- bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed));
+ struct bch_str_hash_ctx ctx;
- bch_str_hash_update(&ctx, info->type, name->name, name->len);
+ bch_str_hash_init(&ctx, info);
+ bch_str_hash_update(&ctx, info, name->name, name->len);
- /* [0,2) reserved for dots */
- return max_t(u64, bch_str_hash_end(&ctx, info->type), 2);
- }
- }
+ /* [0,2) reserved for dots */
+ return max_t(u64, bch_str_hash_end(&ctx, info), 2);
}
static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
diff --git a/libbcache/extents.c b/libbcache/extents.c
index c026d591..4b8a2665 100644
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -9,19 +9,19 @@
#include "bkey_methods.h"
#include "btree_gc.h"
#include "btree_update.h"
+#include "checksum.h"
#include "debug.h"
#include "dirent.h"
#include "error.h"
#include "extents.h"
#include "inode.h"
#include "journal.h"
-#include "super.h"
+#include "super-io.h"
#include "writeback.h"
#include "xattr.h"
#include <trace/events/bcache.h>
-static bool __bch_extent_normalize(struct cache_set *, struct bkey_s, bool);
static enum merge_result bch_extent_merge(struct cache_set *, struct btree *,
struct bkey_i *, struct bkey_i *);
@@ -120,21 +120,38 @@ bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
return NULL;
}
-unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e,
- const struct bch_extent_ptr *start)
+unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
unsigned nr_ptrs = 0;
- extent_for_each_ptr_from(e, ptr, start)
+ extent_for_each_ptr(e, ptr)
nr_ptrs++;
return nr_ptrs;
}
-unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent e)
+unsigned bch_extent_nr_dirty_ptrs(struct bkey_s_c k)
{
- return bch_extent_nr_ptrs_from(e, &e.v->start->ptr);
+ struct bkey_s_c_extent e;
+ const struct bch_extent_ptr *ptr;
+ unsigned nr_ptrs = 0;
+
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED:
+ e = bkey_s_c_to_extent(k);
+
+ extent_for_each_ptr(e, ptr)
+ nr_ptrs += !ptr->cached;
+ break;
+
+ case BCH_RESERVATION:
+ nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas;
+ break;
+ }
+
+ return nr_ptrs;
}
/* returns true if equal */
@@ -177,16 +194,19 @@ void bch_extent_crc_narrow_pointers(struct bkey_s_extent e, union bch_extent_crc
*
* and then verify that crc_dead1 + crc_live + crc_dead2 == orig_crc, and then
* use crc_live here (that we verified was correct earlier)
+ *
+ * note: doesn't work with encryption
*/
void bch_extent_narrow_crcs(struct bkey_s_extent e)
{
union bch_extent_crc *crc;
bool have_wide = false, have_narrow = false;
- u64 csum = 0;
+ struct bch_csum csum = { 0 };
unsigned csum_type = 0;
extent_for_each_crc(e, crc) {
- if (crc_compression_type(crc))
+ if (crc_compression_type(crc) ||
+ bch_csum_type_is_encryption(crc_csum_type(crc)))
continue;
if (crc_uncompressed_size(e.k, crc) != e.k->size) {
@@ -210,26 +230,38 @@ void bch_extent_narrow_crcs(struct bkey_s_extent e)
case BCH_EXTENT_CRC_NONE:
BUG();
case BCH_EXTENT_CRC32:
- if (bch_crc_size[csum_type] > sizeof(crc->crc32.csum))
+ if (bch_crc_bytes[csum_type] > 4)
continue;
bch_extent_crc_narrow_pointers(e, crc);
- crc->crc32.compressed_size = e.k->size;
- crc->crc32.uncompressed_size = e.k->size;
+ crc->crc32._compressed_size = e.k->size - 1;
+ crc->crc32._uncompressed_size = e.k->size - 1;
crc->crc32.offset = 0;
crc->crc32.csum_type = csum_type;
- crc->crc32.csum = csum;
+ crc->crc32.csum = csum.lo;
break;
case BCH_EXTENT_CRC64:
- if (bch_crc_size[csum_type] > sizeof(crc->crc64.csum))
+ if (bch_crc_bytes[csum_type] > 10)
continue;
bch_extent_crc_narrow_pointers(e, crc);
- crc->crc64.compressed_size = e.k->size;
- crc->crc64.uncompressed_size = e.k->size;
+ crc->crc64._compressed_size = e.k->size - 1;
+ crc->crc64._uncompressed_size = e.k->size - 1;
crc->crc64.offset = 0;
crc->crc64.csum_type = csum_type;
- crc->crc64.csum = csum;
+ crc->crc64.csum_lo = csum.lo;
+ crc->crc64.csum_hi = csum.hi;
+ break;
+ case BCH_EXTENT_CRC128:
+ if (bch_crc_bytes[csum_type] > 16)
+ continue;
+
+ bch_extent_crc_narrow_pointers(e, crc);
+ crc->crc128._compressed_size = e.k->size - 1;
+ crc->crc128._uncompressed_size = e.k->size - 1;
+ crc->crc128.offset = 0;
+ crc->crc128.csum_type = csum_type;
+ crc->crc128.csum = csum;
break;
}
}
@@ -300,13 +332,8 @@ static void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent e)
struct bch_extent_ptr *ptr = &e.v->start->ptr;
bool dropped = false;
- /*
- * We don't want to change which pointers are considered cached/dirty,
- * so don't remove pointers that are considered dirty:
- */
rcu_read_lock();
- while ((ptr = extent_ptr_next(e, ptr)) &&
- !bch_extent_ptr_is_dirty(c, e.c, ptr))
+ while ((ptr = extent_ptr_next(e, ptr)))
if (should_drop_ptr(c, e.c, ptr)) {
__bch_extent_drop_ptr(e, ptr);
dropped = true;
@@ -321,16 +348,43 @@ static void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent e)
static bool bch_ptr_normalize(struct cache_set *c, struct btree *bk,
struct bkey_s k)
{
- return __bch_extent_normalize(c, k, false);
+ return bch_extent_normalize(c, k);
}
static void bch_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
{
- u64 *d = (u64 *) bkeyp_val(f, k);
- unsigned i;
+ switch (k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ union bch_extent_entry *entry;
+ u64 *d = (u64 *) bkeyp_val(f, k);
+ unsigned i;
- for (i = 0; i < bkeyp_val_u64s(f, k); i++)
- d[i] = swab64(d[i]);
+ for (i = 0; i < bkeyp_val_u64s(f, k); i++)
+ d[i] = swab64(d[i]);
+
+ for (entry = (union bch_extent_entry *) d;
+ entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+ entry = extent_entry_next(entry)) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_crc32:
+ entry->crc32.csum = swab32(entry->crc32.csum);
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ entry->crc128.csum.hi = swab64(entry->crc64.csum_hi);
+ entry->crc128.csum.lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_ptr:
+ break;
+ }
+ }
+ break;
+ }
+ }
}
static const char *extent_ptr_invalid(struct bkey_s_c_extent e,
@@ -341,7 +395,7 @@ static const char *extent_ptr_invalid(struct bkey_s_c_extent e,
const struct bch_extent_ptr *ptr2;
const struct cache_member_cpu *m = mi->m + ptr->dev;
- if (ptr->dev > mi->nr_in_set || !m->valid)
+ if (ptr->dev > mi->nr_devices || !m->valid)
return "pointer to invalid device";
extent_for_each_ptr(e, ptr2)
@@ -380,7 +434,9 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
crc = entry_to_crc(entry);
+
p("crc: c_size %u size %u offset %u csum %u compress %u",
crc_compressed_size(e.k, crc),
crc_uncompressed_size(e.k, crc),
@@ -388,7 +444,8 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
crc_compression_type(crc));
break;
case BCH_EXTENT_ENTRY_ptr:
- ptr = &entry->ptr;
+ ptr = entry_to_ptr(entry);
+
p("ptr: %u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
(ca = PTR_CACHE(c, ptr)) && ptr_stale(ca, ptr)
@@ -621,6 +678,10 @@ static bool __bch_cut_front(struct bpos where, struct bkey_s k)
if (prev_crc != crc)
crc->crc64.offset += e.k->size - len;
break;
+ case BCH_EXTENT_CRC128:
+ if (prev_crc != crc)
+ crc->crc128.offset += e.k->size - len;
+ break;
}
prev_crc = crc;
}
@@ -948,7 +1009,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
BUG_ON(!l.k->size || !r.k->size);
if (l.k->type != r.k->type ||
- l.k->version != r.k->version)
+ bversion_cmp(l.k->version, r.k->version))
return false;
switch (l.k->type) {
@@ -985,7 +1046,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
extent_for_each_ptr(le, lp) {
const union bch_extent_entry *entry =
- bkey_idx(re.v, (u64 *) lp - le.v->_data);
+ vstruct_idx(re.v, (u64 *) lp - le.v->_data);
if (!extent_entry_is_ptr(entry))
return false;
@@ -1142,7 +1203,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY) &&
bkey_cmp(s->committed, insert->k.p) &&
- bkey_extent_is_compressed(c, bkey_i_to_s_c(insert))) {
+ bkey_extent_is_compressed(bkey_i_to_s_c(insert))) {
/* XXX: possibly need to increase our reservation? */
bch_cut_subtract_back(s, s->committed,
bkey_i_to_s(&split.k));
@@ -1178,12 +1239,19 @@ __extent_insert_advance_pos(struct extent_insert_state *s,
{
struct extent_insert_hook *hook = s->trans->hook;
enum extent_insert_hook_ret ret;
-
+#if 0
+ /*
+ * Currently disabled for encryption - broken with fcollapse. Will have
+ * to reenable when versions are exposed for send/receive - versions
+ * will have to be monotonic then:
+ */
if (k.k && k.k->size &&
- s->insert->k->k.version &&
- k.k->version > s->insert->k->k.version)
+ !bversion_zero(s->insert->k->k.version) &&
+ bversion_cmp(k.k->version, s->insert->k->k.version) > 0) {
ret = BTREE_HOOK_NO_INSERT;
- else if (hook)
+ } else
+#endif
+ if (hook)
ret = hook->fn(hook, s->committed, next_pos, k, s->insert->k);
else
ret = BTREE_HOOK_DO_INSERT;
@@ -1257,7 +1325,7 @@ extent_insert_check_split_compressed(struct extent_insert_state *s,
unsigned sectors;
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
- (sectors = bkey_extent_is_compressed(c, k))) {
+ (sectors = bkey_extent_is_compressed(k))) {
int flags = BCH_DISK_RESERVATION_BTREE_LOCKS_HELD;
if (s->trans->flags & BTREE_INSERT_NOFAIL)
@@ -1680,6 +1748,7 @@ static const char *bch_extent_invalid(const struct cache_set *c,
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
const union bch_extent_crc *crc;
+ const struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi = cache_member_info_get(c);
unsigned size_ondisk = e.k->size;
const char *reason;
@@ -1689,9 +1758,7 @@ static const char *bch_extent_invalid(const struct cache_set *c,
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
goto invalid;
- switch (extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_crc32:
- case BCH_EXTENT_ENTRY_crc64:
+ if (extent_entry_is_crc(entry)) {
crc = entry_to_crc(entry);
reason = "checksum offset + key size > uncompressed size";
@@ -1702,19 +1769,19 @@ static const char *bch_extent_invalid(const struct cache_set *c,
size_ondisk = crc_compressed_size(e.k, crc);
reason = "invalid checksum type";
- if (crc_csum_type(crc) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, crc_csum_type(crc)))
goto invalid;
reason = "invalid compression type";
if (crc_compression_type(crc) >= BCH_COMPRESSION_NR)
goto invalid;
- break;
- case BCH_EXTENT_ENTRY_ptr:
+ } else {
+ ptr = entry_to_ptr(entry);
+
reason = extent_ptr_invalid(e, mi,
&entry->ptr, size_ondisk);
if (reason)
goto invalid;
- break;
}
}
@@ -1725,8 +1792,17 @@ invalid:
return reason;
}
- case BCH_RESERVATION:
+ case BCH_RESERVATION: {
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
+ return "incorrect value size";
+
+ if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
+ return "invalid nr_replicas";
+
return NULL;
+ }
default:
return "invalid value type";
@@ -1743,7 +1819,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
unsigned seq, stale;
char buf[160];
bool bad;
- unsigned ptrs_per_tier[CACHE_TIERS];
+ unsigned ptrs_per_tier[BCH_TIER_MAX];
unsigned tier, replicas = 0;
/*
@@ -1760,11 +1836,9 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
mi = cache_member_info_get(c);
extent_for_each_ptr(e, ptr) {
- bool dirty = bch_extent_ptr_is_dirty(c, e, ptr);
-
replicas++;
- if (ptr->dev >= mi->nr_in_set)
+ if (ptr->dev >= mi->nr_devices)
goto bad_device;
/*
@@ -1796,7 +1870,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
stale = ptr_stale(ca, ptr);
- cache_set_bug_on(stale && dirty, c,
+ cache_set_bug_on(stale && !ptr->cached, c,
"stale dirty pointer");
cache_set_bug_on(stale > 96, c,
@@ -1809,9 +1883,9 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
bad = (mark.is_metadata ||
(gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
!mark.owned_by_allocator &&
- !(dirty
- ? mark.dirty_sectors
- : mark.cached_sectors)));
+ !(ptr->cached
+ ? mark.cached_sectors
+ : mark.dirty_sectors)));
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
if (bad)
@@ -1869,6 +1943,7 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
bch_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k));
+ break;
case BCH_RESERVATION:
break;
default:
@@ -1896,69 +1971,77 @@ static void bch_extent_to_text(struct cache_set *c, char *buf,
static unsigned PTR_TIER(struct cache_member_rcu *mi,
const struct bch_extent_ptr *ptr)
{
- return ptr->dev < mi->nr_in_set
+ return ptr->dev < mi->nr_devices
? mi->m[ptr->dev].tier
: UINT_MAX;
}
-void bch_extent_entry_append(struct bkey_i_extent *e,
- union bch_extent_entry *entry)
-{
- BUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
- BKEY_EXTENT_VAL_U64s_MAX);
-
- memcpy_u64s(extent_entry_last(extent_i_to_s(e)),
- entry,
- extent_entry_u64s(entry));
- e->k.u64s += extent_entry_u64s(entry);
-}
-
-const unsigned bch_crc_size[] = {
- [BCH_CSUM_NONE] = 0,
- [BCH_CSUM_CRC32C] = 4,
- [BCH_CSUM_CRC64] = 8,
-};
-
static void bch_extent_crc_init(union bch_extent_crc *crc,
unsigned compressed_size,
unsigned uncompressed_size,
unsigned compression_type,
- u64 csum, unsigned csum_type)
+ unsigned nonce,
+ struct bch_csum csum, unsigned csum_type)
{
- if (bch_crc_size[csum_type] <= 4 &&
- uncompressed_size <= CRC32_EXTENT_SIZE_MAX) {
+ if (bch_crc_bytes[csum_type] <= 4 &&
+ uncompressed_size <= CRC32_SIZE_MAX &&
+ nonce <= CRC32_NONCE_MAX) {
crc->crc32 = (struct bch_extent_crc32) {
.type = 1 << BCH_EXTENT_ENTRY_crc32,
- .compressed_size = compressed_size,
- .uncompressed_size = uncompressed_size,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
.offset = 0,
.compression_type = compression_type,
.csum_type = csum_type,
- .csum = csum,
+ .csum = *((__le32 *) &csum.lo),
};
- } else {
- BUG_ON(uncompressed_size > CRC64_EXTENT_SIZE_MAX);
+ return;
+ }
+ if (bch_crc_bytes[csum_type] <= 10 &&
+ uncompressed_size <= CRC64_SIZE_MAX &&
+ nonce <= CRC64_NONCE_MAX) {
crc->crc64 = (struct bch_extent_crc64) {
.type = 1 << BCH_EXTENT_ENTRY_crc64,
- .compressed_size = compressed_size,
- .uncompressed_size = uncompressed_size,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
+ .offset = 0,
+ .nonce = nonce,
+ .compression_type = compression_type,
+ .csum_type = csum_type,
+ .csum_lo = csum.lo,
+ .csum_hi = *((__le16 *) &csum.hi),
+ };
+ return;
+ }
+
+ if (bch_crc_bytes[csum_type] <= 16 &&
+ uncompressed_size <= CRC128_SIZE_MAX &&
+ nonce <= CRC128_NONCE_MAX) {
+ crc->crc128 = (struct bch_extent_crc128) {
+ .type = 1 << BCH_EXTENT_ENTRY_crc128,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
.offset = 0,
+ .nonce = nonce,
.compression_type = compression_type,
.csum_type = csum_type,
.csum = csum,
};
+ return;
}
+
+ BUG();
}
void bch_extent_crc_append(struct bkey_i_extent *e,
unsigned compressed_size,
unsigned uncompressed_size,
unsigned compression_type,
- u64 csum, unsigned csum_type)
+ unsigned nonce,
+ struct bch_csum csum, unsigned csum_type)
{
union bch_extent_crc *crc;
- union bch_extent_crc new;
BUG_ON(compressed_size > uncompressed_size);
BUG_ON(uncompressed_size != e->k.size);
@@ -1971,123 +2054,26 @@ void bch_extent_crc_append(struct bkey_i_extent *e,
extent_for_each_crc(extent_i_to_s(e), crc)
;
- switch (extent_crc_type(crc)) {
- case BCH_EXTENT_CRC_NONE:
- if (!csum_type && !compression_type)
- return;
- break;
- case BCH_EXTENT_CRC32:
- case BCH_EXTENT_CRC64:
- if (crc_compressed_size(&e->k, crc) == compressed_size &&
- crc_uncompressed_size(&e->k, crc) == uncompressed_size &&
- crc_offset(crc) == 0 &&
- crc_compression_type(crc) == compression_type &&
- crc_csum_type(crc) == csum_type &&
- crc_csum(crc) == csum)
- return;
- break;
- }
+ if (!crc && !csum_type && !compression_type)
+ return;
+
+ if (crc &&
+ crc_compressed_size(&e->k, crc) == compressed_size &&
+ crc_uncompressed_size(&e->k, crc) == uncompressed_size &&
+ crc_offset(crc) == 0 &&
+ crc_nonce(crc) == nonce &&
+ crc_csum_type(crc) == csum_type &&
+ crc_compression_type(crc) == compression_type &&
+ crc_csum(crc).lo == csum.lo &&
+ crc_csum(crc).hi == csum.hi)
+ return;
- bch_extent_crc_init(&new,
+ bch_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)),
compressed_size,
uncompressed_size,
compression_type,
- csum, csum_type);
- bch_extent_entry_append(e, to_entry(&new));
-}
-
-static void __extent_sort_ptrs(struct cache_member_rcu *mi,
- struct bkey_s_extent src)
-{
- struct bch_extent_ptr *src_ptr, *dst_ptr;
- union bch_extent_crc *src_crc, *dst_crc;
- union bch_extent_crc _src;
- BKEY_PADDED(k) tmp;
- struct bkey_s_extent dst;
- size_t u64s, crc_u64s;
- u64 *p;
-
- /*
- * Insertion sort:
- *
- * Note: this sort needs to be stable, because pointer order determines
- * pointer dirtyness.
- */
-
- tmp.k.k = *src.k;
- dst = bkey_i_to_s_extent(&tmp.k);
- set_bkey_val_u64s(dst.k, 0);
-
- extent_for_each_ptr_crc(src, src_ptr, src_crc) {
- extent_for_each_ptr_crc(dst, dst_ptr, dst_crc)
- if (PTR_TIER(mi, src_ptr) < PTR_TIER(mi, dst_ptr))
- goto found;
-
- dst_ptr = &extent_entry_last(dst)->ptr;
- dst_crc = NULL;
-found:
- /* found insert position: */
-
- /*
- * we're making sure everything has a crc at this point, if
- * dst_ptr points to a pointer it better have a crc:
- */
- BUG_ON(dst_ptr != &extent_entry_last(dst)->ptr && !dst_crc);
- BUG_ON(dst_crc &&
- (extent_entry_next(to_entry(dst_crc)) !=
- to_entry(dst_ptr)));
-
- if (!src_crc) {
- bch_extent_crc_init(&_src, src.k->size,
- src.k->size, 0, 0, 0);
- src_crc = &_src;
- }
-
- p = dst_ptr != &extent_entry_last(dst)->ptr
- ? (void *) dst_crc
- : (void *) dst_ptr;
-
- crc_u64s = extent_entry_u64s(to_entry(src_crc));
- u64s = crc_u64s + sizeof(*dst_ptr) / sizeof(u64);
-
- memmove_u64s_up(p + u64s, p,
- (u64 *) extent_entry_last(dst) - (u64 *) p);
- set_bkey_val_u64s(dst.k, bkey_val_u64s(dst.k) + u64s);
-
- memcpy_u64s(p, src_crc, crc_u64s);
- memcpy_u64s(p + crc_u64s, src_ptr,
- sizeof(*src_ptr) / sizeof(u64));
- }
-
- /* Sort done - now drop redundant crc entries: */
- bch_extent_drop_redundant_crcs(dst);
-
- memcpy_u64s(src.v, dst.v, bkey_val_u64s(dst.k));
- set_bkey_val_u64s(src.k, bkey_val_u64s(dst.k));
-}
-
-static void extent_sort_ptrs(struct cache_set *c, struct bkey_s_extent e)
-{
- struct cache_member_rcu *mi;
- struct bch_extent_ptr *ptr, *prev = NULL;
- union bch_extent_crc *crc;
-
- /*
- * First check if any pointers are out of order before doing the actual
- * sort:
- */
- mi = cache_member_info_get(c);
-
- extent_for_each_ptr_crc(e, ptr, crc) {
- if (prev &&
- PTR_TIER(mi, ptr) < PTR_TIER(mi, prev)) {
- __extent_sort_ptrs(mi, e);
- break;
- }
- prev = ptr;
- }
-
- cache_member_info_put();
+ nonce, csum, csum_type);
+ __extent_entry_push(e);
}
/*
@@ -2098,8 +2084,7 @@ static void extent_sort_ptrs(struct cache_set *c, struct bkey_s_extent e)
* For existing keys, only called when btree nodes are being rewritten, not when
* they're merely being compacted/resorted in memory.
*/
-static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
- bool sort)
+bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
{
struct bkey_s_extent e;
@@ -2112,7 +2097,7 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
return true;
case KEY_TYPE_DISCARD:
- return !k.k->version;
+ return bversion_zero(k.k->version);
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
@@ -2120,13 +2105,10 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
bch_extent_drop_stale(c, e);
- if (sort)
- extent_sort_ptrs(c, e);
-
if (!bkey_val_u64s(e.k)) {
if (bkey_extent_is_cached(e.k)) {
k.k->type = KEY_TYPE_DISCARD;
- if (!k.k->version)
+ if (bversion_zero(k.k->version))
return true;
} else {
k.k->type = KEY_TYPE_ERROR;
@@ -2141,9 +2123,40 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
}
}
-bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
+void bch_extent_mark_replicas_cached(struct cache_set *c,
+ struct bkey_s_extent e,
+ unsigned nr_cached)
{
- return __bch_extent_normalize(c, k, true);
+ struct bch_extent_ptr *ptr;
+ struct cache_member_rcu *mi;
+ bool have_higher_tier;
+ unsigned tier = 0;
+
+ if (!nr_cached)
+ return;
+
+ mi = cache_member_info_get(c);
+
+ do {
+ have_higher_tier = false;
+
+ extent_for_each_ptr(e, ptr) {
+ if (!ptr->cached &&
+ PTR_TIER(mi, ptr) == tier) {
+ ptr->cached = true;
+ nr_cached--;
+ if (!nr_cached)
+ goto out;
+ }
+
+ if (PTR_TIER(mi, ptr) > tier)
+ have_higher_tier = true;
+ }
+
+ tier++;
+ } while (have_higher_tier);
+out:
+ cache_member_info_put();
}
/*
@@ -2183,7 +2196,7 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
extent_for_each_online_device_crc(c, e, crc, ptr, ca)
if (!ptr_stale(ca, ptr)) {
*ret = (struct extent_pick_ptr) {
- .crc = crc_to_64(e.k, crc),
+ .crc = crc_to_128(e.k, crc),
.ptr = *ptr,
.ca = ca,
};
@@ -2227,7 +2240,7 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
if (l->k.u64s != r->k.u64s ||
l->k.type != r->k.type ||
- l->k.version != r->k.version ||
+ bversion_cmp(l->k.version, r->k.version) ||
bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
return BCH_MERGE_NOMERGE;
@@ -2235,7 +2248,6 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
- case BCH_RESERVATION:
/* These types are mergeable, and no val to check */
break;
@@ -2248,7 +2260,7 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
struct bch_extent_ptr *lp, *rp;
struct cache_member_cpu *m;
- en_r = bkey_idx(er.v, (u64 *) en_l - el.v->_data);
+ en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
if ((extent_entry_type(en_l) !=
extent_entry_type(en_r)) ||
@@ -2276,6 +2288,15 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
}
break;
+ case BCH_RESERVATION: {
+ struct bkey_i_reservation *li = bkey_i_to_reservation(l);
+ struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+
+ if (li->v.generation != ri->v.generation ||
+ li->v.nr_replicas != ri->v.nr_replicas)
+ return BCH_MERGE_NOMERGE;
+ break;
+ }
default:
return BCH_MERGE_NOMERGE;
}
diff --git a/libbcache/extents.h b/libbcache/extents.h
index e1cb47ab..b0a05422 100644
--- a/libbcache/extents.h
+++ b/libbcache/extents.h
@@ -26,7 +26,7 @@ struct cache_set;
struct journal_res;
struct extent_pick_ptr {
- struct bch_extent_crc64 crc;
+ struct bch_extent_crc128 crc;
struct bch_extent_ptr ptr;
struct cache *ca;
};
@@ -53,10 +53,11 @@ bch_insert_fixup_extent(struct btree_insert *,
struct btree_insert_entry *);
bool bch_extent_normalize(struct cache_set *, struct bkey_s);
+void bch_extent_mark_replicas_cached(struct cache_set *,
+ struct bkey_s_extent, unsigned);
-unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent,
- const struct bch_extent_ptr *);
unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent);
+unsigned bch_extent_nr_dirty_ptrs(struct bkey_s_c);
static inline bool bkey_extent_is_data(const struct bkey *k)
{
@@ -117,6 +118,8 @@ static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
return sizeof(struct bch_extent_crc32);
case BCH_EXTENT_ENTRY_crc64:
return sizeof(struct bch_extent_crc64);
+ case BCH_EXTENT_ENTRY_crc128:
+ return sizeof(struct bch_extent_crc128);
case BCH_EXTENT_ENTRY_ptr:
return sizeof(struct bch_extent_ptr);
default:
@@ -143,6 +146,7 @@ union bch_extent_crc {
u8 type;
struct bch_extent_crc32 crc32;
struct bch_extent_crc64 crc64;
+ struct bch_extent_crc128 crc128;
};
/* downcast, preserves const */
@@ -185,10 +189,11 @@ enum bch_extent_crc_type {
BCH_EXTENT_CRC_NONE,
BCH_EXTENT_CRC32,
BCH_EXTENT_CRC64,
+ BCH_EXTENT_CRC128,
};
static inline enum bch_extent_crc_type
-extent_crc_type(const union bch_extent_crc *crc)
+__extent_crc_type(const union bch_extent_crc *crc)
{
if (!crc)
return BCH_EXTENT_CRC_NONE;
@@ -198,16 +203,31 @@ extent_crc_type(const union bch_extent_crc *crc)
return BCH_EXTENT_CRC32;
case BCH_EXTENT_ENTRY_crc64:
return BCH_EXTENT_CRC64;
+ case BCH_EXTENT_ENTRY_crc128:
+ return BCH_EXTENT_CRC128;
default:
BUG();
}
}
+#define extent_crc_type(_crc) \
+({ \
+ BUILD_BUG_ON(!type_is(_crc, struct bch_extent_crc32 *) && \
+ !type_is(_crc, struct bch_extent_crc64 *) && \
+ !type_is(_crc, struct bch_extent_crc128 *) && \
+ !type_is(_crc, union bch_extent_crc *)); \
+ \
+ type_is(_crc, struct bch_extent_crc32 *) ? BCH_EXTENT_CRC32 \
+ : type_is(_crc, struct bch_extent_crc64 *) ? BCH_EXTENT_CRC64 \
+ : type_is(_crc, struct bch_extent_crc128 *) ? BCH_EXTENT_CRC128 \
+ : __extent_crc_type((union bch_extent_crc *) _crc); \
+})
+
#define extent_entry_next(_entry) \
((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry)))
#define extent_entry_last(_e) \
- bkey_idx((_e).v, bkey_val_u64s((_e).k))
+ vstruct_idx((_e).v, bkey_val_u64s((_e).k))
/* Iterate over all entries: */
@@ -283,20 +303,16 @@ out: \
#define extent_ptr_next(_e, _ptr) \
extent_ptr_next_filter(_e, _ptr, true)
-#define extent_for_each_ptr_from_filter(_e, _ptr, _start, _filter) \
- for ((_ptr) = (_start); \
+#define extent_for_each_ptr_filter(_e, _ptr, _filter) \
+ for ((_ptr) = &(_e).v->start->ptr; \
((_ptr) = extent_ptr_next_filter(_e, _ptr, _filter)); \
(_ptr)++)
-#define extent_for_each_ptr_from(_e, _ptr, _start) \
- extent_for_each_ptr_from_filter(_e, _ptr, _start, true)
-
#define extent_for_each_ptr(_e, _ptr) \
- extent_for_each_ptr_from_filter(_e, _ptr, &(_e).v->start->ptr, true)
+ extent_for_each_ptr_filter(_e, _ptr, true)
#define extent_for_each_online_device(_c, _e, _ptr, _ca) \
- extent_for_each_ptr_from_filter(_e, _ptr, &(_e).v->start->ptr, \
- ((_ca) = PTR_CACHE(_c, _ptr)))
+ extent_for_each_ptr_filter(_e, _ptr, ((_ca) = PTR_CACHE(_c, _ptr)))
#define extent_ptr_prev(_e, _ptr) \
({ \
@@ -321,67 +337,114 @@ out: \
(_ptr); \
(_ptr) = extent_ptr_prev(_e, _ptr))
-void bch_extent_entry_append(struct bkey_i_extent *, union bch_extent_entry *);
void bch_extent_crc_append(struct bkey_i_extent *, unsigned, unsigned,
- unsigned, u64, unsigned);
+ unsigned, unsigned, struct bch_csum, unsigned);
+
+static inline void __extent_entry_push(struct bkey_i_extent *e)
+{
+ union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e));
+
+ EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
+ BKEY_EXTENT_VAL_U64s_MAX);
+
+ e->k.u64s += extent_entry_u64s(entry);
+}
static inline void extent_ptr_append(struct bkey_i_extent *e,
struct bch_extent_ptr ptr)
{
ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
- bch_extent_entry_append(e, to_entry(&ptr));
+ extent_entry_last(extent_i_to_s(e))->ptr = ptr;
+ __extent_entry_push(e);
}
-/* XXX: inefficient */
-static inline bool bch_extent_ptr_is_dirty(const struct cache_set *c,
- struct bkey_s_c_extent e,
- const struct bch_extent_ptr *ptr)
+static inline struct bch_extent_crc128 crc_to_128(const struct bkey *k,
+ const union bch_extent_crc *crc)
{
- if (bkey_extent_is_cached(e.k))
- return false;
-
- /* Dirty pointers come last */
- return bch_extent_nr_ptrs_from(e, ptr) <= c->opts.data_replicas;
-}
-
-extern const unsigned bch_crc_size[];
+ EBUG_ON(!k->size);
-static inline struct bch_extent_crc64 crc_to_64(const struct bkey *k,
- const union bch_extent_crc *crc)
-{
switch (extent_crc_type(crc)) {
case BCH_EXTENT_CRC_NONE:
- return (struct bch_extent_crc64) {
- .compressed_size = k->size,
- .uncompressed_size = k->size,
+ return (struct bch_extent_crc128) {
+ ._compressed_size = k->size - 1,
+ ._uncompressed_size = k->size - 1,
};
case BCH_EXTENT_CRC32:
- return (struct bch_extent_crc64) {
- .compressed_size = crc->crc32.compressed_size,
- .uncompressed_size = crc->crc32.uncompressed_size,
+ return (struct bch_extent_crc128) {
+ .type = 1 << BCH_EXTENT_ENTRY_crc128,
+ ._compressed_size = crc->crc32._compressed_size,
+ ._uncompressed_size = crc->crc32._uncompressed_size,
.offset = crc->crc32.offset,
.csum_type = crc->crc32.csum_type,
.compression_type = crc->crc32.compression_type,
- .csum = crc->crc32.csum,
+ .csum.lo = crc->crc32.csum,
};
case BCH_EXTENT_CRC64:
- return crc->crc64;
+ return (struct bch_extent_crc128) {
+ .type = 1 << BCH_EXTENT_ENTRY_crc128,
+ ._compressed_size = crc->crc64._compressed_size,
+ ._uncompressed_size = crc->crc64._uncompressed_size,
+ .offset = crc->crc64.offset,
+ .nonce = crc->crc64.nonce,
+ .csum_type = crc->crc64.csum_type,
+ .compression_type = crc->crc64.compression_type,
+ .csum.lo = crc->crc64.csum_lo,
+ .csum.hi = crc->crc64.csum_hi,
+ };
+ case BCH_EXTENT_CRC128:
+ return crc->crc128;
default:
BUG();
}
}
-static inline unsigned crc_compressed_size(const struct bkey *k,
- const union bch_extent_crc *crc)
-{
- return crc_to_64(k, crc).compressed_size;
-}
+#define crc_compressed_size(_k, _crc) \
+({ \
+ unsigned _size = 0; \
+ \
+ switch (extent_crc_type(_crc)) { \
+ case BCH_EXTENT_CRC_NONE: \
+ _size = ((const struct bkey *) (_k))->size; \
+ break; \
+ case BCH_EXTENT_CRC32: \
+ _size = ((struct bch_extent_crc32 *) _crc) \
+ ->_compressed_size + 1; \
+ break; \
+ case BCH_EXTENT_CRC64: \
+ _size = ((struct bch_extent_crc64 *) _crc) \
+ ->_compressed_size + 1; \
+ break; \
+ case BCH_EXTENT_CRC128: \
+ _size = ((struct bch_extent_crc128 *) _crc) \
+ ->_compressed_size + 1; \
+ break; \
+ } \
+ _size; \
+})
-static inline unsigned crc_uncompressed_size(const struct bkey *k,
- const union bch_extent_crc *crc)
-{
- return crc_to_64(k, crc).uncompressed_size;
-}
+#define crc_uncompressed_size(_k, _crc) \
+({ \
+ unsigned _size = 0; \
+ \
+ switch (extent_crc_type(_crc)) { \
+ case BCH_EXTENT_CRC_NONE: \
+ _size = ((const struct bkey *) (_k))->size; \
+ break; \
+ case BCH_EXTENT_CRC32: \
+ _size = ((struct bch_extent_crc32 *) _crc) \
+ ->_uncompressed_size + 1; \
+ break; \
+ case BCH_EXTENT_CRC64: \
+ _size = ((struct bch_extent_crc64 *) _crc) \
+ ->_uncompressed_size + 1; \
+ break; \
+ case BCH_EXTENT_CRC128: \
+ _size = ((struct bch_extent_crc128 *) _crc) \
+ ->_uncompressed_size + 1; \
+ break; \
+ } \
+ _size; \
+})
static inline unsigned crc_offset(const union bch_extent_crc *crc)
{
@@ -392,6 +455,23 @@ static inline unsigned crc_offset(const union bch_extent_crc *crc)
return crc->crc32.offset;
case BCH_EXTENT_CRC64:
return crc->crc64.offset;
+ case BCH_EXTENT_CRC128:
+ return crc->crc128.offset;
+ default:
+ BUG();
+ }
+}
+
+static inline unsigned crc_nonce(const union bch_extent_crc *crc)
+{
+ switch (extent_crc_type(crc)) {
+ case BCH_EXTENT_CRC_NONE:
+ case BCH_EXTENT_CRC32:
+ return 0;
+ case BCH_EXTENT_CRC64:
+ return crc->crc64.nonce;
+ case BCH_EXTENT_CRC128:
+ return crc->crc128.nonce;
default:
BUG();
}
@@ -406,6 +486,8 @@ static inline unsigned crc_csum_type(const union bch_extent_crc *crc)
return crc->crc32.csum_type;
case BCH_EXTENT_CRC64:
return crc->crc64.csum_type;
+ case BCH_EXTENT_CRC128:
+ return crc->crc128.csum_type;
default:
BUG();
}
@@ -420,27 +502,33 @@ static inline unsigned crc_compression_type(const union bch_extent_crc *crc)
return crc->crc32.compression_type;
case BCH_EXTENT_CRC64:
return crc->crc64.compression_type;
+ case BCH_EXTENT_CRC128:
+ return crc->crc128.compression_type;
default:
BUG();
}
}
-static inline u64 crc_csum(const union bch_extent_crc *crc)
+static inline struct bch_csum crc_csum(const union bch_extent_crc *crc)
{
switch (extent_crc_type(crc)) {
case BCH_EXTENT_CRC_NONE:
- return 0;
+ return (struct bch_csum) { 0 };
case BCH_EXTENT_CRC32:
- return crc->crc32.csum;
+ return (struct bch_csum) { .lo = crc->crc32.csum };
case BCH_EXTENT_CRC64:
- return crc->crc64.csum;
+ return (struct bch_csum) {
+ .lo = crc->crc64.csum_lo,
+ .hi = crc->crc64.csum_hi,
+ };
+ case BCH_EXTENT_CRC128:
+ return crc->crc128.csum;
default:
BUG();
}
}
-static inline unsigned bkey_extent_is_compressed(struct cache_set *c,
- struct bkey_s_c k)
+static inline unsigned bkey_extent_is_compressed(struct bkey_s_c k)
{
struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
@@ -453,7 +541,7 @@ static inline unsigned bkey_extent_is_compressed(struct cache_set *c,
e = bkey_s_c_to_extent(k);
extent_for_each_ptr_crc(e, ptr, crc)
- if (bch_extent_ptr_is_dirty(c, e, ptr) &&
+ if (!ptr->cached &&
crc_compression_type(crc) != BCH_COMPRESSION_NONE &&
crc_compressed_size(e.k, crc) < k.k->size)
ret = max_t(unsigned, ret,
@@ -463,6 +551,17 @@ static inline unsigned bkey_extent_is_compressed(struct cache_set *c,
return ret;
}
+static inline unsigned extent_current_nonce(struct bkey_s_c_extent e)
+{
+ const union bch_extent_crc *crc;
+
+ extent_for_each_crc(e, crc)
+ if (bch_csum_type_is_encryption(crc_csum_type(crc)))
+ return crc_offset(crc) + crc_nonce(crc);
+
+ return 0;
+}
+
void bch_extent_narrow_crcs(struct bkey_s_extent);
void bch_extent_drop_redundant_crcs(struct bkey_s_extent);
diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c
index 1dec230f..a758e895 100644
--- a/libbcache/fs-gc.c
+++ b/libbcache/fs-gc.c
@@ -17,7 +17,7 @@ static int remove_dirent(struct cache_set *c, struct btree_iter *iter,
struct bkey_s_c_dirent dirent)
{
struct qstr name;
- struct bkey_i_inode dir_inode;
+ struct bch_inode_unpacked dir_inode;
struct bch_hash_info dir_hash_info;
u64 dir_inum = dirent.k->p.inode;
int ret;
@@ -39,7 +39,7 @@ static int remove_dirent(struct cache_set *c, struct btree_iter *iter,
if (ret)
goto err;
- dir_hash_info = bch_hash_info_init(&dir_inode.v);
+ dir_hash_info = bch_hash_info_init(&dir_inode);
ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
err:
@@ -48,11 +48,12 @@ err:
}
static int reattach_inode(struct cache_set *c,
- struct bkey_i_inode *lostfound_inode,
+ struct bch_inode_unpacked *lostfound_inode,
u64 inum)
{
struct bch_hash_info lostfound_hash_info =
- bch_hash_info_init(&lostfound_inode->v);
+ bch_hash_info_init(lostfound_inode);
+ struct bkey_inode_buf packed;
char name_buf[20];
struct qstr name;
int ret;
@@ -60,14 +61,16 @@ static int reattach_inode(struct cache_set *c,
snprintf(name_buf, sizeof(name_buf), "%llu", inum);
name = (struct qstr) QSTR(name_buf);
- le32_add_cpu(&lostfound_inode->v.i_nlink, 1);
+ lostfound_inode->i_nlink++;
- ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i,
+ bch_inode_pack(&packed, lostfound_inode);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, NULL, 0);
if (ret)
return ret;
- return bch_dirent_create(c, lostfound_inode->k.p.inode,
+ return bch_dirent_create(c, lostfound_inode->inum,
&lostfound_hash_info,
DT_DIR, &name, inum, NULL, 0);
}
@@ -75,10 +78,8 @@ static int reattach_inode(struct cache_set *c,
struct inode_walker {
bool first_this_inode;
bool have_inode;
- u16 i_mode;
- u64 i_size;
u64 cur_inum;
- struct bkey_i_inode inode;
+ struct bch_inode_unpacked inode;
};
static struct inode_walker inode_walker_init(void)
@@ -101,11 +102,6 @@ static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum)
return ret;
w->have_inode = !ret;
-
- if (w->have_inode) {
- w->i_mode = le16_to_cpu(w->inode.v.i_mode);
- w->i_size = le64_to_cpu(w->inode.v.i_size);
- }
}
return 0;
@@ -138,20 +134,20 @@ static int check_extents(struct cache_set *c)
k.k->type, k.k->p.inode);
unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
- le64_to_cpu(w.inode.v.i_sectors) !=
+ w.inode.i_sectors !=
(i_sectors = bch_count_inode_sectors(c, w.cur_inum)),
c, "i_sectors wrong: got %llu, should be %llu",
- le64_to_cpu(w.inode.v.i_sectors), i_sectors);
+ w.inode.i_sectors, i_sectors);
unfixable_fsck_err_on(w.have_inode &&
- !S_ISREG(w.i_mode) && !S_ISLNK(w.i_mode), c,
+ !S_ISREG(w.inode.i_mode) && !S_ISLNK(w.inode.i_mode), c,
"extent type %u for non regular file, inode %llu mode %o",
- k.k->type, k.k->p.inode, w.i_mode);
+ k.k->type, k.k->p.inode, w.inode.i_mode);
unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
- k.k->p.offset > round_up(w.i_size, PAGE_SIZE) >> 9, c,
+ k.k->p.offset > round_up(w.inode.i_size, PAGE_SIZE) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu",
- k.k->type, k.k->p.offset, k.k->p.inode, w.i_size);
+ k.k->type, k.k->p.offset, k.k->p.inode, w.inode.i_size);
}
fsck_err:
return bch_btree_iter_unlock(&iter) ?: ret;
@@ -172,7 +168,7 @@ static int check_dirents(struct cache_set *c)
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
POS(BCACHE_ROOT_INO, 0), k) {
struct bkey_s_c_dirent d;
- struct bkey_i_inode target;
+ struct bch_inode_unpacked target;
bool have_target;
u64 d_inum;
@@ -184,9 +180,9 @@ static int check_dirents(struct cache_set *c)
"dirent in nonexisting directory %llu",
k.k->p.inode);
- unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c,
+ unfixable_fsck_err_on(!S_ISDIR(w.inode.i_mode), c,
"dirent in non directory inode %llu, type %u",
- k.k->p.inode, mode_to_type(w.i_mode));
+ k.k->p.inode, mode_to_type(w.inode.i_mode));
if (k.k->type != BCH_DIRENT)
continue;
@@ -220,10 +216,10 @@ static int check_dirents(struct cache_set *c)
if (fsck_err_on(have_target &&
d.v->d_type !=
- mode_to_type(le16_to_cpu(target.v.i_mode)), c,
+ mode_to_type(le16_to_cpu(target.i_mode)), c,
"incorrect d_type: got %u should be %u, filename %s",
d.v->d_type,
- mode_to_type(le16_to_cpu(target.v.i_mode)),
+ mode_to_type(le16_to_cpu(target.i_mode)),
d.v->d_name)) {
struct bkey_i_dirent *n;
@@ -234,7 +230,7 @@ static int check_dirents(struct cache_set *c)
}
bkey_reassemble(&n->k_i, d.s_c);
- n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode));
+ n->v.d_type = mode_to_type(le16_to_cpu(target.i_mode));
ret = bch_btree_insert_at(c, NULL, NULL, NULL,
BTREE_INSERT_NOFAIL,
@@ -276,8 +272,9 @@ fsck_err:
}
/* Get root directory, create if it doesn't exist: */
-static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode)
+static int check_root(struct cache_set *c, struct bch_inode_unpacked *root_inode)
{
+ struct bkey_inode_buf packed;
int ret;
ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
@@ -287,7 +284,7 @@ static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode)
if (fsck_err_on(ret, c, "root directory missing"))
goto create_root;
- if (fsck_err_on(!S_ISDIR(le16_to_cpu(root_inode->v.i_mode)), c,
+ if (fsck_err_on(!S_ISDIR(root_inode->i_mode), c,
"root inode not a directory"))
goto create_root;
@@ -296,19 +293,23 @@ fsck_err:
return ret;
create_root:
bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
- root_inode->k.p.inode = BCACHE_ROOT_INO;
+ root_inode->inum = BCACHE_ROOT_INO;
+
+ bch_inode_pack(&packed, root_inode);
- return bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ return bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, NULL, 0);
}
/* Get lost+found, create if it doesn't exist: */
static int check_lostfound(struct cache_set *c,
- struct bkey_i_inode *root_inode,
- struct bkey_i_inode *lostfound_inode)
+ struct bch_inode_unpacked *root_inode,
+ struct bch_inode_unpacked *lostfound_inode)
{
struct qstr lostfound = QSTR("lost+found");
- struct bch_hash_info root_hash_info = bch_hash_info_init(&root_inode->v);
+ struct bch_hash_info root_hash_info =
+ bch_hash_info_init(root_inode);
+ struct bkey_inode_buf packed;
u64 inum;
int ret;
@@ -326,7 +327,7 @@ static int check_lostfound(struct cache_set *c,
if (fsck_err_on(ret, c, "lost+found missing"))
goto create_lostfound;
- if (fsck_err_on(!S_ISDIR(le16_to_cpu(lostfound_inode->v.i_mode)), c,
+ if (fsck_err_on(!S_ISDIR(lostfound_inode->i_mode), c,
"lost+found inode not a directory"))
goto create_lostfound;
@@ -334,22 +335,27 @@ static int check_lostfound(struct cache_set *c,
fsck_err:
return ret;
create_lostfound:
- le32_add_cpu(&root_inode->v.i_nlink, 1);
+ root_inode->i_nlink++;
- ret = bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ bch_inode_pack(&packed, root_inode);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &packed.inode.k_i,
NULL, NULL, NULL, 0);
if (ret)
return ret;
bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
+ bch_inode_pack(&packed, lostfound_inode);
- ret = bch_inode_create(c, &lostfound_inode->k_i, BLOCKDEV_INODE_MAX, 0,
+ ret = bch_inode_create(c, &packed.inode.k_i, BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (ret)
return ret;
+ lostfound_inode->inum = packed.inode.k.p.inode;
+
ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
- &lostfound, lostfound_inode->k.p.inode, NULL, 0);
+ &lostfound, lostfound_inode->inum, NULL, 0);
if (ret)
return ret;
@@ -420,7 +426,7 @@ static int path_down(struct pathbuf *p, u64 inum)
noinline_for_stack
static int check_directory_structure(struct cache_set *c,
- struct bkey_i_inode *lostfound_inode)
+ struct bch_inode_unpacked *lostfound_inode)
{
struct inode_bitmap dirs_done = { NULL, 0 };
struct pathbuf path = { 0, 0, NULL };
@@ -618,25 +624,30 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
}
static int bch_gc_do_inode(struct cache_set *c,
- struct bkey_i_inode *lostfound_inode,
+ struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
{
- u16 i_mode = le16_to_cpu(inode.v->i_mode);
- u32 i_flags = le32_to_cpu(inode.v->i_flags);
- u32 i_nlink = le32_to_cpu(inode.v->i_nlink);
- u64 i_size = le64_to_cpu(inode.v->i_size);
- s64 i_sectors = 0;
+ struct bch_inode_unpacked u;
int ret = 0;
- u32 real_i_nlink;
+ u32 i_nlink, real_i_nlink;
+ bool do_update = false;
+
+ ret = bch_inode_unpack(inode, &u);
+ if (cache_set_inconsistent_on(ret, c,
+ "error unpacking inode %llu in fs-gc",
+ inode.k->p.inode))
+ return ret;
+
+ i_nlink = u.i_nlink + nlink_bias(u.i_mode);
fsck_err_on(i_nlink < link.count, c,
"inode %llu i_link too small (%u < %u, type %i)",
inode.k->p.inode, i_nlink,
- link.count, mode_to_type(i_mode));
+ link.count, mode_to_type(u.i_mode));
/* These should have been caught/fixed by earlier passes: */
- if (S_ISDIR(i_mode)) {
+ if (S_ISDIR(u.i_mode)) {
need_fsck_err_on(link.count > 1, c,
"directory %llu with multiple hardlinks: %u",
inode.k->p.inode, link.count);
@@ -656,7 +667,7 @@ static int bch_gc_do_inode(struct cache_set *c,
"but found orphaned inode %llu",
inode.k->p.inode);
- if (fsck_err_on(S_ISDIR(i_mode) &&
+ if (fsck_err_on(S_ISDIR(u.i_mode) &&
bch_empty_dir(c, inode.k->p.inode), c,
"non empty directory with link count 0, "
"inode nlink %u, dir links found %u",
@@ -676,7 +687,7 @@ static int bch_gc_do_inode(struct cache_set *c,
return ret;
}
- if (i_flags & BCH_INODE_I_SIZE_DIRTY) {
+ if (u.i_flags & BCH_INODE_I_SIZE_DIRTY) {
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_size dirty",
@@ -690,7 +701,7 @@ static int bch_gc_do_inode(struct cache_set *c,
*/
ret = bch_inode_truncate(c, inode.k->p.inode,
- round_up(i_size, PAGE_SIZE) >> 9,
+ round_up(u.i_size, PAGE_SIZE) >> 9,
NULL, NULL);
if (ret) {
bch_err(c, "error in fs gc: error %i "
@@ -702,10 +713,15 @@ static int bch_gc_do_inode(struct cache_set *c,
* We truncated without our normal sector accounting hook, just
* make sure we recalculate it:
*/
- i_flags |= BCH_INODE_I_SECTORS_DIRTY;
+ u.i_flags |= BCH_INODE_I_SECTORS_DIRTY;
+
+ u.i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
+ do_update = true;
}
- if (i_flags & BCH_INODE_I_SECTORS_DIRTY) {
+ if (u.i_flags & BCH_INODE_I_SECTORS_DIRTY) {
+ s64 sectors;
+
fsck_err_on(c->sb.clean, c,
"filesystem marked clean, "
"but inode %llu has i_sectors dirty",
@@ -714,13 +730,17 @@ static int bch_gc_do_inode(struct cache_set *c,
bch_verbose(c, "recounting sectors for inode %llu",
inode.k->p.inode);
- i_sectors = bch_count_inode_sectors(c, inode.k->p.inode);
- if (i_sectors < 0) {
+ sectors = bch_count_inode_sectors(c, inode.k->p.inode);
+ if (sectors < 0) {
bch_err(c, "error in fs gc: error %i "
"recounting inode sectors",
- (int) i_sectors);
- return i_sectors;
+ (int) sectors);
+ return sectors;
}
+
+ u.i_sectors = sectors;
+ u.i_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
+ do_update = true;
}
if (i_nlink != real_i_nlink) {
@@ -728,30 +748,23 @@ static int bch_gc_do_inode(struct cache_set *c,
"filesystem marked clean, "
"but inode %llu has wrong i_nlink "
"(type %u i_nlink %u, should be %u)",
- inode.k->p.inode, mode_to_type(i_mode),
+ inode.k->p.inode, mode_to_type(u.i_mode),
i_nlink, real_i_nlink);
bch_verbose(c, "setting inode %llu nlinks from %u to %u",
inode.k->p.inode, i_nlink, real_i_nlink);
+ u.i_nlink = real_i_nlink - nlink_bias(u.i_mode);;
+ do_update = true;
}
- if (i_nlink != real_i_nlink||
- i_flags & BCH_INODE_I_SECTORS_DIRTY ||
- i_flags & BCH_INODE_I_SIZE_DIRTY) {
- struct bkey_i_inode update;
-
- bkey_reassemble(&update.k_i, inode.s_c);
- update.v.i_nlink = cpu_to_le32(real_i_nlink);
- update.v.i_flags = cpu_to_le32(i_flags &
- ~(BCH_INODE_I_SIZE_DIRTY|
- BCH_INODE_I_SECTORS_DIRTY));
+ if (do_update) {
+ struct bkey_inode_buf p;
- if (i_flags & BCH_INODE_I_SECTORS_DIRTY)
- update.v.i_sectors = cpu_to_le64(i_sectors);
+ bch_inode_pack(&p, &u);
ret = bch_btree_insert_at(c, NULL, NULL, NULL,
BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(iter, &update.k_i));
+ BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
if (ret && ret != -EINTR)
bch_err(c, "error in fs gc: error %i "
"updating inode", ret);
@@ -762,7 +775,7 @@ fsck_err:
noinline_for_stack
static int bch_gc_walk_inodes(struct cache_set *c,
- struct bkey_i_inode *lostfound_inode,
+ struct bch_inode_unpacked *lostfound_inode,
struct nlinks *links,
u64 range_start, u64 range_end)
{
@@ -835,7 +848,7 @@ fsck_err:
noinline_for_stack
static int check_inode_nlinks(struct cache_set *c,
- struct bkey_i_inode *lostfound_inode)
+ struct bch_inode_unpacked *lostfound_inode)
{
struct nlinks links;
u64 this_iter_range_start, next_iter_range_start = 0;
@@ -873,7 +886,7 @@ static int check_inode_nlinks(struct cache_set *c,
*/
int bch_fsck(struct cache_set *c, bool full_fsck)
{
- struct bkey_i_inode root_inode, lostfound_inode;
+ struct bch_inode_unpacked root_inode, lostfound_inode;
int ret;
ret = check_root(c, &root_inode);
diff --git a/libbcache/fs-io.c b/libbcache/fs-io.c
index 942baeb1..ecf249c3 100644
--- a/libbcache/fs-io.c
+++ b/libbcache/fs-io.c
@@ -59,22 +59,20 @@ static int write_invalidate_inode_pages_range(struct address_space *mapping,
/* i_size updates: */
-static int inode_set_size(struct bch_inode_info *ei, struct bch_inode *bi,
+static int inode_set_size(struct bch_inode_info *ei,
+ struct bch_inode_unpacked *bi,
void *p)
{
loff_t *new_i_size = p;
- unsigned i_flags = le32_to_cpu(bi->i_flags);
lockdep_assert_held(&ei->update_lock);
- bi->i_size = cpu_to_le64(*new_i_size);
+ bi->i_size = *new_i_size;
if (atomic_long_read(&ei->i_size_dirty_count))
- i_flags |= BCH_INODE_I_SIZE_DIRTY;
+ bi->i_flags |= BCH_INODE_I_SIZE_DIRTY;
else
- i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-
- bi->i_flags = cpu_to_le32(i_flags);
+ bi->i_flags &= ~BCH_INODE_I_SIZE_DIRTY;
return 0;
}
@@ -122,23 +120,22 @@ i_sectors_hook_fn(struct extent_insert_hook *hook,
}
static int inode_set_i_sectors_dirty(struct bch_inode_info *ei,
- struct bch_inode *bi, void *p)
+ struct bch_inode_unpacked *bi, void *p)
{
- BUG_ON(le32_to_cpu(bi->i_flags) & BCH_INODE_I_SECTORS_DIRTY);
+ BUG_ON(bi->i_flags & BCH_INODE_I_SECTORS_DIRTY);
- bi->i_flags = cpu_to_le32(le32_to_cpu(bi->i_flags)|
- BCH_INODE_I_SECTORS_DIRTY);
+ bi->i_flags |= BCH_INODE_I_SECTORS_DIRTY;
return 0;
}
static int inode_clear_i_sectors_dirty(struct bch_inode_info *ei,
- struct bch_inode *bi, void *p)
+ struct bch_inode_unpacked *bi,
+ void *p)
{
- BUG_ON(!(le32_to_cpu(bi->i_flags) & BCH_INODE_I_SECTORS_DIRTY));
+ BUG_ON(!(bi->i_flags & BCH_INODE_I_SECTORS_DIRTY));
- bi->i_sectors = cpu_to_le64(atomic64_read(&ei->i_sectors));
- bi->i_flags = cpu_to_le32(le32_to_cpu(bi->i_flags) &
- ~BCH_INODE_I_SECTORS_DIRTY);
+ bi->i_sectors = atomic64_read(&ei->i_sectors);
+ bi->i_flags &= ~BCH_INODE_I_SECTORS_DIRTY;
return 0;
}
@@ -203,7 +200,10 @@ static int __must_check i_sectors_dirty_get(struct bch_inode_info *ei,
struct bchfs_extent_trans_hook {
struct bchfs_write_op *op;
struct extent_insert_hook hook;
- struct bkey_i_inode new_inode;
+
+ struct bch_inode_unpacked inode_u;
+ struct bkey_inode_buf inode_p;
+
bool need_inode_update;
};
@@ -222,6 +222,7 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
(k.k && bkey_extent_is_allocation(k.k));
s64 sectors = (s64) (next_pos.offset - committed_pos.offset) * sign;
u64 offset = min(next_pos.offset << 9, h->op->new_i_size);
+ bool do_pack = false;
BUG_ON((next_pos.offset << 9) > round_up(offset, PAGE_SIZE));
@@ -234,7 +235,9 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
return BTREE_HOOK_RESTART_TRANS;
}
- h->new_inode.v.i_size = cpu_to_le64(offset);
+ h->inode_u.i_size = offset;
+ do_pack = true;
+
ei->i_size = offset;
if (h->op->is_dio)
@@ -247,7 +250,9 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
return BTREE_HOOK_RESTART_TRANS;
}
- le64_add_cpu(&h->new_inode.v.i_sectors, sectors);
+ h->inode_u.i_sectors += sectors;
+ do_pack = true;
+
atomic64_add(sectors, &ei->i_sectors);
h->op->sectors_added += sectors;
@@ -259,6 +264,9 @@ bchfs_extent_update_hook(struct extent_insert_hook *hook,
}
}
+ if (do_pack)
+ bch_inode_pack(&h->inode_p, &h->inode_u);
+
return BTREE_HOOK_DO_INSERT;
}
@@ -310,13 +318,32 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
break;
}
- bkey_reassemble(&hook.new_inode.k_i, inode);
+ if (WARN_ONCE(bkey_bytes(inode.k) >
+ sizeof(hook.inode_p),
+ "inode %llu too big (%zu bytes, buf %zu)",
+ extent_iter.pos.inode,
+ bkey_bytes(inode.k),
+ sizeof(hook.inode_p))) {
+ ret = -ENOENT;
+ break;
+ }
+
+ bkey_reassemble(&hook.inode_p.inode.k_i, inode);
+ ret = bch_inode_unpack(bkey_s_c_to_inode(inode),
+ &hook.inode_u);
+ if (WARN_ONCE(ret,
+ "error %i unpacking inode %llu",
+ ret, extent_iter.pos.inode)) {
+ ret = -ENOENT;
+ break;
+ }
ret = bch_btree_insert_at(wop->c, &wop->res,
&hook.hook, op_journal_seq(wop),
BTREE_INSERT_NOFAIL|BTREE_INSERT_ATOMIC,
BTREE_INSERT_ENTRY(&extent_iter, k),
- BTREE_INSERT_ENTRY(&inode_iter, &hook.new_inode.k_i));
+ BTREE_INSERT_ENTRY_EXTRA_RES(&inode_iter,
+ &hook.inode_p.inode.k_i, 2));
} else {
ret = bch_btree_insert_at(wop->c, &wop->res,
&hook.hook, op_journal_seq(wop),
@@ -350,25 +377,15 @@ err:
struct bch_page_state {
union { struct {
/*
- * BCH_PAGE_ALLOCATED: page is _fully_ written on disk, and not
- * compressed - which means to write this page we don't have to reserve
- * space (the new write will never take up more space on disk than what
- * it's overwriting)
- *
- * BCH_PAGE_UNALLOCATED: page is not fully written on disk, or is
- * compressed - before writing we have to reserve space with
- * bch_reserve_sectors()
- *
- * BCH_PAGE_RESERVED: page has space reserved on disk (reservation will
- * be consumed when the page is written).
+ * page is _fully_ written on disk, and not compressed - which means to
+ * write this page we don't have to reserve space (the new write will
+ * never take up more space on disk than what it's overwriting)
*/
- enum {
- BCH_PAGE_UNALLOCATED = 0,
- BCH_PAGE_ALLOCATED,
- } alloc_state:2;
+ unsigned allocated:1;
/* Owns PAGE_SECTORS sized reservation: */
unsigned reserved:1;
+ unsigned nr_replicas:4;
/*
* Number of sectors on disk - for i_blocks
@@ -431,11 +448,9 @@ static int bch_get_page_reservation(struct cache_set *c, struct page *page,
struct disk_reservation res;
int ret = 0;
- BUG_ON(s->alloc_state == BCH_PAGE_ALLOCATED &&
- s->sectors != PAGE_SECTORS);
+ BUG_ON(s->allocated && s->sectors != PAGE_SECTORS);
- if (s->reserved ||
- s->alloc_state == BCH_PAGE_ALLOCATED)
+ if (s->allocated || s->reserved)
return 0;
ret = bch_disk_reservation_get(c, &res, PAGE_SECTORS, !check_enospc
@@ -448,7 +463,8 @@ static int bch_get_page_reservation(struct cache_set *c, struct page *page,
bch_disk_reservation_put(c, &res);
return 0;
}
- new.reserved = 1;
+ new.reserved = 1;
+ new.nr_replicas = res.nr_replicas;
});
return 0;
@@ -585,10 +601,10 @@ static void bch_mark_pages_unalloc(struct bio *bio)
struct bio_vec bv;
bio_for_each_segment(bv, bio, iter)
- page_state(bv.bv_page)->alloc_state = BCH_PAGE_UNALLOCATED;
+ page_state(bv.bv_page)->allocated = 0;
}
-static void bch_add_page_sectors(struct bio *bio, const struct bkey *k)
+static void bch_add_page_sectors(struct bio *bio, struct bkey_s_c k)
{
struct bvec_iter iter;
struct bio_vec bv;
@@ -597,12 +613,17 @@ static void bch_add_page_sectors(struct bio *bio, const struct bkey *k)
struct bch_page_state *s = page_state(bv.bv_page);
/* sectors in @k from the start of this page: */
- unsigned k_sectors = k->size - (iter.bi_sector - k->p.offset);
+ unsigned k_sectors = k.k->size - (iter.bi_sector - k.k->p.offset);
unsigned page_sectors = min(bv.bv_len >> 9, k_sectors);
- BUG_ON(s->sectors + page_sectors > PAGE_SECTORS);
+ if (!s->sectors)
+ s->nr_replicas = bch_extent_nr_dirty_ptrs(k);
+ else
+ s->nr_replicas = min_t(unsigned, s->nr_replicas,
+ bch_extent_nr_dirty_ptrs(k));
+ BUG_ON(s->sectors + page_sectors > PAGE_SECTORS);
s->sectors += page_sectors;
}
}
@@ -634,7 +655,7 @@ static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode
EBUG_ON(s->reserved);
- s->alloc_state = BCH_PAGE_ALLOCATED;
+ s->allocated = 1;
s->sectors = 0;
}
@@ -650,7 +671,7 @@ static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode
k = bkey_i_to_s_c(&tmp.k);
if (!bkey_extent_is_allocation(k.k) ||
- bkey_extent_is_compressed(c, k))
+ bkey_extent_is_compressed(k))
bch_mark_pages_unalloc(bio);
bch_extent_pick_ptr(c, k, &pick);
@@ -667,7 +688,7 @@ static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode
swap(bio->bi_iter.bi_size, bytes);
if (bkey_extent_is_allocation(k.k))
- bch_add_page_sectors(bio, k.k);
+ bch_add_page_sectors(bio, k);
if (pick.ca) {
PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
@@ -859,6 +880,10 @@ static void bch_writepage_io_alloc(struct cache_set *c,
struct page *page)
{
u64 inum = ei->vfs_inode.i_ino;
+ unsigned nr_replicas = page_state(page)->nr_replicas;
+
+ EBUG_ON(!nr_replicas);
+ /* XXX: disk_reservation->gen isn't plumbed through */
if (!w->io) {
alloc_io:
@@ -881,7 +906,8 @@ alloc_io:
w->io->op.op.index_update_fn = bchfs_write_index_update;
}
- if (bio_add_page_contig(&w->io->bio.bio, page)) {
+ if (w->io->op.op.res.nr_replicas != nr_replicas ||
+ bio_add_page_contig(&w->io->bio.bio, page)) {
bch_writepage_do_io(w);
goto alloc_io;
}
@@ -936,13 +962,13 @@ do_io:
/* Before unlocking the page, transfer reservation to w->io: */
old = page_state_cmpxchg(page_state(page), new, {
- BUG_ON(!new.reserved &&
- (new.sectors != PAGE_SECTORS ||
- new.alloc_state != BCH_PAGE_ALLOCATED));
+ EBUG_ON(!new.reserved &&
+ (new.sectors != PAGE_SECTORS ||
+ !new.allocated));
- if (new.alloc_state == BCH_PAGE_ALLOCATED &&
+ if (new.allocated &&
w->io->op.op.compression_type != BCH_COMPRESSION_NONE)
- new.alloc_state = BCH_PAGE_UNALLOCATED;
+ new.allocated = 0;
else if (!new.reserved)
goto out;
new.reserved = 0;
@@ -1919,7 +1945,7 @@ int bch_truncate(struct inode *inode, struct iattr *iattr)
mutex_lock(&ei->update_lock);
setattr_copy(inode, iattr);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
/* clear I_SIZE_DIRTY: */
i_size_dirty_put(ei);
@@ -1981,7 +2007,7 @@ static long bch_fpunch(struct inode *inode, loff_t offset, loff_t len)
ret = bch_discard(c,
POS(ino, discard_start),
POS(ino, discard_end),
- 0,
+ ZERO_VERSION,
&disk_res,
&i_sectors_hook.hook,
&ei->journal_seq);
@@ -2132,12 +2158,11 @@ static long bch_fallocate(struct inode *inode, int mode,
struct cache_set *c = inode->i_sb->s_fs_info;
struct i_sectors_hook i_sectors_hook;
struct btree_iter iter;
- struct bkey_i reservation;
- struct bkey_s_c k;
struct bpos end;
loff_t block_start, block_end;
loff_t new_size = offset + len;
unsigned sectors;
+ unsigned replicas = READ_ONCE(c->opts.data_replicas);
int ret;
bch_btree_iter_init_intent(&iter, c, BTREE_ID_EXTENTS, POS_MIN);
@@ -2186,13 +2211,16 @@ static long bch_fallocate(struct inode *inode, int mode,
while (bkey_cmp(iter.pos, end) < 0) {
struct disk_reservation disk_res = { 0 };
+ struct bkey_i_reservation reservation;
+ struct bkey_s_c k;
k = bch_btree_iter_peek_with_holes(&iter);
if ((ret = btree_iter_err(k)))
goto btree_iter_err;
/* already reserved */
- if (k.k->type == BCH_RESERVATION) {
+ if (k.k->type == BCH_RESERVATION &&
+ bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
bch_btree_iter_advance_pos(&iter);
continue;
}
@@ -2204,29 +2232,32 @@ static long bch_fallocate(struct inode *inode, int mode,
}
}
- bkey_init(&reservation.k);
+ bkey_reservation_init(&reservation.k_i);
reservation.k.type = BCH_RESERVATION;
reservation.k.p = k.k->p;
reservation.k.size = k.k->size;
- bch_cut_front(iter.pos, &reservation);
+ bch_cut_front(iter.pos, &reservation.k_i);
bch_cut_back(end, &reservation.k);
sectors = reservation.k.size;
+ reservation.v.nr_replicas = bch_extent_nr_dirty_ptrs(k);
- if (!bkey_extent_is_allocation(k.k) ||
- bkey_extent_is_compressed(c, k)) {
+ if (reservation.v.nr_replicas < replicas ||
+ bkey_extent_is_compressed(k)) {
ret = bch_disk_reservation_get(c, &disk_res,
sectors, 0);
if (ret)
goto err_put_sectors_dirty;
+
+ reservation.v.nr_replicas = disk_res.nr_replicas;
}
ret = bch_btree_insert_at(c, &disk_res, &i_sectors_hook.hook,
&ei->journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &reservation));
+ BTREE_INSERT_ENTRY(&iter, &reservation.k_i));
bch_disk_reservation_put(c, &disk_res);
btree_iter_err:
if (ret < 0 && ret != -EINTR)
diff --git a/libbcache/fs.c b/libbcache/fs.c
index 884a950f..76948e79 100644
--- a/libbcache/fs.c
+++ b/libbcache/fs.c
@@ -26,7 +26,9 @@
static struct kmem_cache *bch_inode_cache;
-static void bch_vfs_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
+static void bch_vfs_inode_init(struct cache_set *,
+ struct bch_inode_info *,
+ struct bch_inode_unpacked *);
/*
* I_SIZE_DIRTY requires special handling:
@@ -63,11 +65,20 @@ int __must_check __bch_write_inode(struct cache_set *c,
{
struct btree_iter iter;
struct inode *inode = &ei->vfs_inode;
- struct bkey_i_inode new_inode;
- struct bch_inode *bi;
+ struct bch_inode_unpacked inode_u;
+ struct bkey_inode_buf inode_p;
u64 inum = inode->i_ino;
+ unsigned i_nlink = READ_ONCE(inode->i_nlink);
int ret;
+ /*
+ * We can't write an inode with i_nlink == 0 because it's stored biased;
+ * however, we don't need to because if i_nlink is 0 the inode is
+ * getting deleted when it's evicted.
+ */
+ if (!i_nlink)
+ return 0;
+
lockdep_assert_held(&ei->update_lock);
bch_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(inum, 0));
@@ -84,33 +95,41 @@ int __must_check __bch_write_inode(struct cache_set *c,
return -ENOENT;
}
- bkey_reassemble(&new_inode.k_i, k);
- bi = &new_inode.v;
+ ret = bch_inode_unpack(bkey_s_c_to_inode(k), &inode_u);
+ if (WARN_ONCE(ret,
+ "error %i unpacking inode %llu", ret, inum)) {
+ ret = -ENOENT;
+ break;
+ }
if (set) {
- ret = set(ei, bi, p);
+ ret = set(ei, &inode_u, p);
if (ret)
goto out;
}
- bi->i_mode = cpu_to_le16(inode->i_mode);
- bi->i_uid = cpu_to_le32(i_uid_read(inode));
- bi->i_gid = cpu_to_le32(i_gid_read(inode));
- bi->i_nlink = cpu_to_le32(inode->i_nlink);
- bi->i_dev = cpu_to_le32(inode->i_rdev);
- bi->i_atime = cpu_to_le64(timespec_to_ns(&inode->i_atime));
- bi->i_mtime = cpu_to_le64(timespec_to_ns(&inode->i_mtime));
- bi->i_ctime = cpu_to_le64(timespec_to_ns(&inode->i_ctime));
+ BUG_ON(i_nlink < nlink_bias(inode->i_mode));
+
+ inode_u.i_mode = inode->i_mode;
+ inode_u.i_uid = i_uid_read(inode);
+ inode_u.i_gid = i_gid_read(inode);
+ inode_u.i_nlink = i_nlink - nlink_bias(inode->i_mode);
+ inode_u.i_dev = inode->i_rdev;
+ inode_u.i_atime = timespec_to_bch_time(c, inode->i_atime);
+ inode_u.i_mtime = timespec_to_bch_time(c, inode->i_mtime);
+ inode_u.i_ctime = timespec_to_bch_time(c, inode->i_ctime);
+
+ bch_inode_pack(&inode_p, &inode_u);
ret = bch_btree_insert_at(c, NULL, NULL, &ei->journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &new_inode.k_i));
+ BTREE_INSERT_ENTRY(&iter, &inode_p.inode.k_i));
} while (ret == -EINTR);
if (!ret) {
- ei->i_size = le64_to_cpu(bi->i_size);
- ei->i_flags = le32_to_cpu(bi->i_flags);
+ ei->i_size = inode_u.i_size;
+ ei->i_flags = inode_u.i_flags;
}
out:
bch_btree_iter_unlock(&iter);
@@ -138,7 +157,7 @@ int bch_inc_nlink(struct cache_set *c, struct bch_inode_info *ei)
int bch_dec_nlink(struct cache_set *c, struct bch_inode_info *ei)
{
- int ret;
+ int ret = 0;
mutex_lock(&ei->update_lock);
drop_nlink(&ei->vfs_inode);
@@ -152,9 +171,8 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
{
struct cache_set *c = sb->s_fs_info;
struct inode *inode;
+ struct bch_inode_unpacked inode_u;
struct bch_inode_info *ei;
- struct btree_iter iter;
- struct bkey_s_c k;
int ret;
pr_debug("inum %llu", inum);
@@ -165,24 +183,19 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
if (!(inode->i_state & I_NEW))
return inode;
- bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0));
- k = bch_btree_iter_peek_with_holes(&iter);
-
- if ((ret = btree_iter_err(k)) || k.k->type != BCH_INODE_FS) {
- ret = bch_btree_iter_unlock(&iter);
+ ret = bch_inode_find_by_inum(c, inum, &inode_u);
+ if (ret) {
iget_failed(inode);
- return ERR_PTR(ret ?: -ENOENT);
+ return ERR_PTR(ret);
}
ei = to_bch_ei(inode);
- bch_vfs_inode_init(ei, bkey_s_c_to_inode(k));
+ bch_vfs_inode_init(c, ei, &inode_u);
ei->journal_seq = bch_inode_journal_seq(&c->journal, inum);
unlock_new_inode(inode);
- bch_btree_iter_unlock(&iter);
-
return inode;
}
@@ -193,7 +206,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
struct inode *inode;
struct posix_acl *default_acl = NULL, *acl = NULL;
struct bch_inode_info *ei;
- struct bkey_i_inode bkey_inode;
+ struct bch_inode_unpacked inode_u;
+ struct bkey_inode_buf inode_p;
int ret;
inode = new_inode(parent->i_sb);
@@ -210,10 +224,11 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
ei = to_bch_ei(inode);
- bch_inode_init(c, &bkey_inode, i_uid_read(inode),
+ bch_inode_init(c, &inode_u, i_uid_read(inode),
i_gid_read(inode), inode->i_mode, rdev);
+ bch_inode_pack(&inode_p, &inode_u);
- ret = bch_inode_create(c, &bkey_inode.k_i,
+ ret = bch_inode_create(c, &inode_p.inode.k_i,
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (unlikely(ret)) {
@@ -225,7 +240,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
goto err;
}
- bch_vfs_inode_init(ei, inode_i_to_s_c(&bkey_inode));
+ inode_u.inum = inode_p.inode.k.p.inode;
+ bch_vfs_inode_init(c, ei, &inode_u);
if (default_acl) {
ret = bch_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
@@ -266,7 +282,7 @@ static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir,
if (unlikely(ret))
return ret;
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
mark_inode_dirty_sync(dir);
return 0;
}
@@ -337,7 +353,7 @@ static int bch_link(struct dentry *old_dentry, struct inode *dir,
lockdep_assert_held(&inode->i_rwsem);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(dir->i_sb);
ret = bch_inc_nlink(c, ei);
if (ret)
@@ -382,12 +398,7 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry)
drop_nlink(inode);
}
- drop_nlink(inode);
- if (inode->i_nlink) {
- mutex_lock(&ei->update_lock);
- ret = bch_write_inode(c, ei);
- mutex_unlock(&ei->update_lock);
- }
+ bch_dec_nlink(c, ei);
return 0;
}
@@ -473,7 +484,7 @@ static int bch_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct bch_inode_info *ei = to_bch_ei(old_inode);
struct inode *new_inode = new_dentry->d_inode;
- struct timespec now = CURRENT_TIME;
+ struct timespec now = current_fs_time(old_dir->i_sb);
int ret;
lockdep_assert_held(&old_dir->i_rwsem);
@@ -550,7 +561,7 @@ static int bch_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode = old_dentry->d_inode;
struct inode *new_inode = new_dentry->d_inode;
struct bch_inode_info *ei = to_bch_ei(old_inode);
- struct timespec now = CURRENT_TIME;
+ struct timespec now = current_fs_time(old_dir->i_sb);
int ret;
ret = bch_dirent_rename(c,
@@ -783,14 +794,14 @@ static unsigned bch_inode_flags_to_user_flags(unsigned flags)
}
static int bch_inode_user_flags_set(struct bch_inode_info *ei,
- struct bch_inode *bi,
+ struct bch_inode_unpacked *bi,
void *p)
{
/*
* We're relying on btree locking here for exclusion with other ioctl
* calls - use the flags in the btree (@bi), not ei->i_flags:
*/
- unsigned bch_flags = le32_to_cpu(bi->i_flags);
+ unsigned bch_flags = bi->i_flags;
unsigned oldflags = bch_inode_flags_to_user_flags(bch_flags);
unsigned newflags = *((unsigned *) p);
unsigned i;
@@ -812,8 +823,8 @@ static int bch_inode_user_flags_set(struct bch_inode_info *ei,
if (oldflags != newflags)
return -EOPNOTSUPP;
- bi->i_flags = cpu_to_le32(bch_flags);
- ei->vfs_inode.i_ctime = CURRENT_TIME;
+ bi->i_flags = bch_flags;
+ ei->vfs_inode.i_ctime = current_fs_time(ei->vfs_inode.i_sb);
return 0;
}
@@ -1010,32 +1021,33 @@ static const struct address_space_operations bch_address_space_operations = {
.error_remove_page = generic_error_remove_page,
};
-static void bch_vfs_inode_init(struct bch_inode_info *ei,
- struct bkey_s_c_inode bkey_inode)
+static void bch_vfs_inode_init(struct cache_set *c,
+ struct bch_inode_info *ei,
+ struct bch_inode_unpacked *bi)
{
struct inode *inode = &ei->vfs_inode;
- const struct bch_inode *bi = bkey_inode.v;
pr_debug("init inode %llu with mode %o",
- bkey_inode.k->p.inode, bi->i_mode);
-
- ei->i_flags = le32_to_cpu(bi->i_flags);
- ei->i_size = le64_to_cpu(bi->i_size);
-
- inode->i_mode = le16_to_cpu(bi->i_mode);
- i_uid_write(inode, le32_to_cpu(bi->i_uid));
- i_gid_write(inode, le32_to_cpu(bi->i_gid));
-
- atomic64_set(&ei->i_sectors, le64_to_cpu(bi->i_sectors));
- inode->i_blocks = atomic64_read(&ei->i_sectors);
-
- inode->i_ino = bkey_inode.k->p.inode;
- set_nlink(inode, le32_to_cpu(bi->i_nlink));
- inode->i_rdev = le32_to_cpu(bi->i_dev);
- inode->i_size = le64_to_cpu(bi->i_size);
- inode->i_atime = ns_to_timespec(le64_to_cpu(bi->i_atime));
- inode->i_mtime = ns_to_timespec(le64_to_cpu(bi->i_mtime));
- inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime));
+ bi->inum, bi->i_mode);
+
+ ei->i_flags = bi->i_flags;
+ ei->i_size = bi->i_size;
+
+ inode->i_mode = bi->i_mode;
+ i_uid_write(inode, bi->i_uid);
+ i_gid_write(inode, bi->i_gid);
+
+ atomic64_set(&ei->i_sectors, bi->i_sectors);
+ inode->i_blocks = bi->i_sectors;
+
+ inode->i_ino = bi->inum;
+ set_nlink(inode, bi->i_nlink + nlink_bias(inode->i_mode));
+ inode->i_rdev = bi->i_dev;
+ inode->i_generation = bi->i_generation;
+ inode->i_size = bi->i_size;
+ inode->i_atime = bch_time_to_timespec(c, bi->i_atime);
+ inode->i_mtime = bch_time_to_timespec(c, bi->i_mtime);
+ inode->i_ctime = bch_time_to_timespec(c, bi->i_ctime);
bch_inode_flags_to_vfs(inode);
ei->str_hash = bch_hash_info_init(bi);
@@ -1149,8 +1161,8 @@ static int bch_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = atomic_long_read(&c->nr_inodes);
buf->f_ffree = U64_MAX;
- fsid = le64_to_cpup((void *) c->disk_sb.user_uuid.b) ^
- le64_to_cpup((void *) c->disk_sb.user_uuid.b + sizeof(u64));
+ fsid = le64_to_cpup((void *) c->sb.user_uuid.b) ^
+ le64_to_cpup((void *) c->sb.user_uuid.b + sizeof(u64));
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
buf->f_namelen = NAME_MAX;
@@ -1380,7 +1392,7 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
sb->s_op = &bch_super_operations;
sb->s_xattr = bch_xattr_handlers;
sb->s_magic = BCACHE_STATFS_MAGIC;
- sb->s_time_gran = 1;
+ sb->s_time_gran = c->sb.time_precision;
c->vfs_sb = sb;
sb->s_bdi = &c->bdi;
diff --git a/libbcache/fs.h b/libbcache/fs.h
index c9820241..aec6159b 100644
--- a/libbcache/fs.h
+++ b/libbcache/fs.h
@@ -34,9 +34,16 @@ static inline u8 mode_to_type(umode_t mode)
return (mode >> 12) & 15;
}
+static inline unsigned nlink_bias(umode_t mode)
+{
+ return S_ISDIR(mode) ? 2 : 1;
+}
+
+struct bch_inode_unpacked;
+
/* returns 0 if we want to do the update, or error is passed up */
typedef int (*inode_set_fn)(struct bch_inode_info *,
- struct bch_inode *, void *);
+ struct bch_inode_unpacked *, void *);
int __must_check __bch_write_inode(struct cache_set *, struct bch_inode_info *,
inode_set_fn, void *);
diff --git a/libbcache/inode.c b/libbcache/inode.c
index 200deb0e..b72a1c51 100644
--- a/libbcache/inode.c
+++ b/libbcache/inode.c
@@ -9,51 +9,195 @@
#include <linux/random.h>
-ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
+#include <asm/unaligned.h>
+
+#define FIELD_BYTES() \
+
+static const u8 byte_table[8] = { 1, 2, 3, 4, 6, 8, 10, 13 };
+static const u8 bits_table[8] = {
+ 1 * 8 - 1,
+ 2 * 8 - 2,
+ 3 * 8 - 3,
+ 4 * 8 - 4,
+ 6 * 8 - 5,
+ 8 * 8 - 6,
+ 10 * 8 - 7,
+ 13 * 8 - 8,
+};
+
+static int inode_encode_field(u8 *out, u8 *end, const u64 in[2])
{
- if (k->p.offset)
- return scnprintf(buf, len, "offset nonzero: %llu", k->p.offset);
-
- if (k->size)
- return scnprintf(buf, len, "size nonzero: %u", k->size);
-
- switch (k->type) {
- case KEY_TYPE_DELETED:
- return scnprintf(buf, len, "deleted");
- case KEY_TYPE_DISCARD:
- return scnprintf(buf, len, "discarded");
- case KEY_TYPE_ERROR:
- return scnprintf(buf, len, "error");
- case KEY_TYPE_COOKIE:
- return scnprintf(buf, len, "cookie");
+ unsigned bytes, bits, shift;
- case BCH_INODE_FS:
- if (bkey_val_bytes(k) != sizeof(struct bch_inode))
- return scnprintf(buf, len, "bad size: %zu",
- bkey_val_bytes(k));
+ if (likely(!in[1]))
+ bits = fls64(in[0]);
+ else
+ bits = fls64(in[1]) + 64;
- if (k->p.inode < BLOCKDEV_INODE_MAX)
- return scnprintf(buf, len,
- "fs inode in blockdev range: %llu",
- k->p.inode);
- return 0;
+ for (shift = 1; shift <= 8; shift++)
+ if (bits < bits_table[shift - 1])
+ goto got_shift;
- case BCH_INODE_BLOCKDEV:
- if (bkey_val_bytes(k) != sizeof(struct bch_inode_blockdev))
- return scnprintf(buf, len, "bad size: %zu",
- bkey_val_bytes(k));
+ BUG();
+got_shift:
+ bytes = byte_table[shift - 1];
- if (k->p.inode >= BLOCKDEV_INODE_MAX)
- return scnprintf(buf, len,
- "blockdev inode in fs range: %llu",
- k->p.inode);
- return 0;
+ BUG_ON(out + bytes > end);
- default:
- return scnprintf(buf, len, "unknown inode type: %u", k->type);
+ if (likely(bytes <= 8)) {
+ u64 b = cpu_to_be64(in[0]);
+
+ memcpy(out, (void *) &b + 8 - bytes, bytes);
+ } else {
+ u64 b = cpu_to_be64(in[1]);
+
+ memcpy(out, (void *) &b + 16 - bytes, bytes);
+ put_unaligned_be64(in[0], out + bytes - 8);
+ }
+
+ *out |= (1 << 8) >> shift;
+
+ return bytes;
+}
+
+static int inode_decode_field(const u8 *in, const u8 *end,
+ u64 out[2], unsigned *out_bits)
+{
+ unsigned bytes, bits, shift;
+
+ if (in >= end)
+ return -1;
+
+ if (!*in)
+ return -1;
+
+ /*
+ * position of highest set bit indicates number of bytes:
+ * shift = number of bits to remove in high byte:
+ */
+ shift = 8 - __fls(*in); /* 1 <= shift <= 8 */
+ bytes = byte_table[shift - 1];
+ bits = bytes * 8 - shift;
+
+ if (in + bytes > end)
+ return -1;
+
+ /*
+ * we're assuming it's safe to deref up to 7 bytes < in; this will work
+ * because keys always start quite a bit more than 7 bytes after the
+ * start of the btree node header:
+ */
+ if (likely(bytes <= 8)) {
+ out[0] = get_unaligned_be64(in + bytes - 8);
+ out[0] <<= 64 - bits;
+ out[0] >>= 64 - bits;
+ out[1] = 0;
+ } else {
+ out[0] = get_unaligned_be64(in + bytes - 8);
+ out[1] = get_unaligned_be64(in + bytes - 16);
+ out[1] <<= 128 - bits;
+ out[1] >>= 128 - bits;
+ }
+
+ *out_bits = out[1] ? 64 + fls64(out[1]) : fls64(out[0]);
+ return bytes;
+}
+
+void bch_inode_pack(struct bkey_inode_buf *packed,
+ const struct bch_inode_unpacked *inode)
+{
+ u8 *out = packed->inode.v.fields;
+ u8 *end = (void *) &packed[1];
+ u8 *last_nonzero_field = out;
+ u64 field[2];
+ unsigned nr_fields = 0, last_nonzero_fieldnr = 0;
+
+ bkey_inode_init(&packed->inode.k_i);
+ packed->inode.k.p.inode = inode->inum;
+ packed->inode.v.i_hash_seed = inode->i_hash_seed;
+ packed->inode.v.i_flags = cpu_to_le32(inode->i_flags);
+ packed->inode.v.i_mode = cpu_to_le16(inode->i_mode);
+
+#define BCH_INODE_FIELD(_name, _bits) \
+ field[0] = inode->_name; \
+ field[1] = 0; \
+ out += inode_encode_field(out, end, field); \
+ nr_fields++; \
+ \
+ if (field[0] | field[1]) { \
+ last_nonzero_field = out; \
+ last_nonzero_fieldnr = nr_fields; \
+ }
+
+ BCH_INODE_FIELDS()
+#undef BCH_INODE_FIELD
+
+ out = last_nonzero_field;
+ nr_fields = last_nonzero_fieldnr;
+
+ set_bkey_val_bytes(&packed->inode.k, out - (u8 *) &packed->inode.v);
+ memset(out, 0,
+ (u8 *) &packed->inode.v +
+ bkey_val_bytes(&packed->inode.k) - out);
+
+ SET_INODE_NR_FIELDS(&packed->inode.v, nr_fields);
+
+ if (IS_ENABLED(CONFIG_BCACHE_DEBUG)) {
+ struct bch_inode_unpacked unpacked;
+
+ int ret = bch_inode_unpack(inode_i_to_s_c(&packed->inode),
+ &unpacked);
+ BUG_ON(ret);
+ BUG_ON(unpacked.inum != inode->inum);
+ BUG_ON(unpacked.i_hash_seed != inode->i_hash_seed);
+ BUG_ON(unpacked.i_mode != inode->i_mode);
+
+#define BCH_INODE_FIELD(_name, _bits) BUG_ON(unpacked._name != inode->_name);
+ BCH_INODE_FIELDS()
+#undef BCH_INODE_FIELD
}
}
+int bch_inode_unpack(struct bkey_s_c_inode inode,
+ struct bch_inode_unpacked *unpacked)
+{
+ const u8 *in = inode.v->fields;
+ const u8 *end = (void *) inode.v + bkey_val_bytes(inode.k);
+ u64 field[2];
+ unsigned fieldnr = 0, field_bits;
+ int ret;
+
+ unpacked->inum = inode.k->p.inode;
+ unpacked->i_hash_seed = inode.v->i_hash_seed;
+ unpacked->i_flags = le32_to_cpu(inode.v->i_flags);
+ unpacked->i_mode = le16_to_cpu(inode.v->i_mode);
+
+#define BCH_INODE_FIELD(_name, _bits) \
+ if (fieldnr++ == INODE_NR_FIELDS(inode.v)) { \
+ memset(&unpacked->_name, 0, \
+ sizeof(*unpacked) - \
+ offsetof(struct bch_inode_unpacked, _name)); \
+ return 0; \
+ } \
+ \
+ ret = inode_decode_field(in, end, field, &field_bits); \
+ if (ret < 0) \
+ return ret; \
+ \
+ if (field_bits > sizeof(unpacked->_name) * 8) \
+ return -1; \
+ \
+ unpacked->_name = field[0]; \
+ in += ret;
+
+ BCH_INODE_FIELDS()
+#undef BCH_INODE_FIELD
+
+ /* XXX: signal if there were more fields than expected? */
+
+ return 0;
+}
+
static const char *bch_inode_invalid(const struct cache_set *c,
struct bkey_s_c k)
{
@@ -63,16 +207,20 @@ static const char *bch_inode_invalid(const struct cache_set *c,
switch (k.k->type) {
case BCH_INODE_FS: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
+ struct bch_inode_unpacked unpacked;
- if (bkey_val_bytes(k.k) != sizeof(struct bch_inode))
+ if (bkey_val_bytes(k.k) < sizeof(struct bch_inode))
return "incorrect value size";
if (k.k->p.inode < BLOCKDEV_INODE_MAX)
return "fs inode in blockdev range";
- if (INODE_STR_HASH_TYPE(inode.v) >= BCH_STR_HASH_NR)
+ if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR)
return "invalid str hash type";
+ if (bch_inode_unpack(inode, &unpacked))
+ return "invalid variable length fields";
+
return NULL;
}
case BCH_INODE_BLOCKDEV:
@@ -92,12 +240,17 @@ static void bch_inode_to_text(struct cache_set *c, char *buf,
size_t size, struct bkey_s_c k)
{
struct bkey_s_c_inode inode;
+ struct bch_inode_unpacked unpacked;
switch (k.k->type) {
case BCH_INODE_FS:
inode = bkey_s_c_to_inode(k);
+ if (bch_inode_unpack(inode, &unpacked)) {
+ scnprintf(buf, size, "(unpack error)");
+ break;
+ }
- scnprintf(buf, size, "i_size %llu", inode.v->i_size);
+ scnprintf(buf, size, "i_size %llu", unpacked.i_size);
break;
}
}
@@ -107,26 +260,25 @@ const struct bkey_ops bch_bkey_inode_ops = {
.val_to_text = bch_inode_to_text,
};
-void bch_inode_init(struct cache_set *c, struct bkey_i_inode *inode,
+void bch_inode_init(struct cache_set *c, struct bch_inode_unpacked *inode_u,
uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
{
- struct timespec ts = CURRENT_TIME;
- s64 now = timespec_to_ns(&ts);
- struct bch_inode *bi;
-
- bi = &bkey_inode_init(&inode->k_i)->v;
- bi->i_uid = cpu_to_le32(uid);
- bi->i_gid = cpu_to_le32(gid);
-
- bi->i_mode = cpu_to_le16(mode);
- bi->i_dev = cpu_to_le32(rdev);
- bi->i_atime = cpu_to_le64(now);
- bi->i_mtime = cpu_to_le64(now);
- bi->i_ctime = cpu_to_le64(now);
- bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1);
-
- get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
- SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
+ s64 now = timespec_to_bch_time(c, CURRENT_TIME);
+
+ memset(inode_u, 0, sizeof(*inode_u));
+
+ /* ick */
+ inode_u->i_flags |= c->sb.str_hash_type << INODE_STR_HASH_OFFSET;
+ get_random_bytes(&inode_u->i_hash_seed, sizeof(inode_u->i_hash_seed));
+
+ inode_u->i_mode = mode;
+ inode_u->i_uid = uid;
+ inode_u->i_gid = gid;
+ inode_u->i_dev = rdev;
+ inode_u->i_atime = now;
+ inode_u->i_mtime = now;
+ inode_u->i_ctime = now;
+ inode_u->i_otime = now;
}
int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
@@ -200,7 +352,7 @@ int bch_inode_truncate(struct cache_set *c, u64 inode_nr, u64 new_size,
struct extent_insert_hook *hook, u64 *journal_seq)
{
return bch_discard(c, POS(inode_nr, new_size), POS(inode_nr + 1, 0),
- 0, NULL, hook, journal_seq);
+ ZERO_VERSION, NULL, hook, journal_seq);
}
int bch_inode_rm(struct cache_set *c, u64 inode_nr)
@@ -215,7 +367,7 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr)
ret = bch_btree_delete_range(c, BTREE_ID_XATTRS,
POS(inode_nr, 0),
POS(inode_nr + 1, 0),
- 0, NULL, NULL, NULL);
+ ZERO_VERSION, NULL, NULL, NULL);
if (ret < 0)
return ret;
@@ -230,7 +382,7 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr)
ret = bch_btree_delete_range(c, BTREE_ID_DIRENTS,
POS(inode_nr, 0),
POS(inode_nr + 1, 0),
- 0, NULL, NULL, NULL);
+ ZERO_VERSION, NULL, NULL, NULL);
if (ret < 0)
return ret;
@@ -241,25 +393,19 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr)
NULL, NULL, BTREE_INSERT_NOFAIL);
}
-int bch_inode_update(struct cache_set *c, struct bkey_i *inode,
- u64 *journal_seq)
-{
- return bch_btree_update(c, BTREE_ID_INODES, inode, journal_seq);
-}
-
int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
- struct bkey_i_inode *inode)
+ struct bch_inode_unpacked *inode)
{
struct btree_iter iter;
struct bkey_s_c k;
+ int ret = -ENOENT;
for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
POS(inode_nr, 0), k) {
switch (k.k->type) {
case BCH_INODE_FS:
- bkey_reassemble(&inode->k_i, k);
- bch_btree_iter_unlock(&iter);
- return 0;
+ ret = bch_inode_unpack(bkey_s_c_to_inode(k), inode);
+ break;
default:
/* hole, not found */
break;
@@ -269,7 +415,7 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
}
- return bch_btree_iter_unlock(&iter) ?: -ENOENT;
+ return bch_btree_iter_unlock(&iter) ?: ret;
}
int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid,
diff --git a/libbcache/inode.h b/libbcache/inode.h
index fa1a4cf9..81dccf68 100644
--- a/libbcache/inode.h
+++ b/libbcache/inode.h
@@ -3,18 +3,53 @@
extern const struct bkey_ops bch_bkey_inode_ops;
-ssize_t bch_inode_status(char *, size_t, const struct bkey *);
+struct bch_inode_unpacked {
+ u64 inum;
+ __le64 i_hash_seed;
+ u32 i_flags;
+ u16 i_mode;
-void bch_inode_init(struct cache_set *, struct bkey_i_inode *,
+#define BCH_INODE_FIELD(_name, _bits) u##_bits _name;
+ BCH_INODE_FIELDS()
+#undef BCH_INODE_FIELD
+};
+
+struct bkey_inode_buf {
+ struct bkey_i_inode inode;
+
+#define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8
+ u8 _pad[0 + BCH_INODE_FIELDS()];
+#undef BCH_INODE_FIELD
+} __packed;
+
+void bch_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
+int bch_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
+
+void bch_inode_init(struct cache_set *, struct bch_inode_unpacked *,
uid_t, gid_t, umode_t, dev_t);
int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *);
int bch_inode_truncate(struct cache_set *, u64, u64,
struct extent_insert_hook *, u64 *);
int bch_inode_rm(struct cache_set *, u64);
-int bch_inode_update(struct cache_set *, struct bkey_i *, u64 *);
-int bch_inode_find_by_inum(struct cache_set *, u64, struct bkey_i_inode *);
+int bch_inode_find_by_inum(struct cache_set *, u64,
+ struct bch_inode_unpacked *);
int bch_cached_dev_inode_find_by_uuid(struct cache_set *, uuid_le *,
struct bkey_i_inode_blockdev *);
+static inline struct timespec bch_time_to_timespec(struct cache_set *c, u64 time)
+{
+ return ns_to_timespec(time * c->sb.time_precision + c->sb.time_base_lo);
+}
+
+static inline u64 timespec_to_bch_time(struct cache_set *c, struct timespec ts)
+{
+ s64 ns = timespec_to_ns(&ts) - c->sb.time_base_lo;
+
+ if (c->sb.time_precision == 1)
+ return ns;
+
+ return div_s64(ns, c->sb.time_precision);
+}
+
#endif
diff --git a/libbcache/io.c b/libbcache/io.c
index 4112ea50..2f0e48a0 100644
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -22,7 +22,7 @@
#include "move.h"
#include "notify.h"
#include "stats.h"
-#include "super.h"
+#include "super-io.h"
#include <linux/blkdev.h>
#include <linux/random.h>
@@ -382,11 +382,27 @@ static void bch_write_endio(struct bio *bio)
closure_put(cl);
}
+static struct nonce extent_nonce(struct bversion version,
+ unsigned nonce,
+ unsigned uncompressed_size,
+ unsigned compression_type)
+{
+ return (struct nonce) {{
+ [0] = cpu_to_le32((nonce << 12) |
+ (uncompressed_size << 22)),
+ [1] = cpu_to_le32(version.lo),
+ [2] = cpu_to_le32(version.lo >> 32),
+ [3] = cpu_to_le32(version.hi|
+ (compression_type << 24))^BCH_NONCE_EXTENT,
+ }};
+}
+
static void init_append_extent(struct bch_write_op *op,
unsigned compressed_size,
unsigned uncompressed_size,
unsigned compression_type,
- u64 csum, unsigned csum_type,
+ unsigned nonce,
+ struct bch_csum csum, unsigned csum_type,
struct open_bucket *ob)
{
struct bkey_i_extent *e = bkey_extent_init(op->insert_keys.top);
@@ -394,11 +410,13 @@ static void init_append_extent(struct bch_write_op *op,
op->pos.offset += uncompressed_size;
e->k.p = op->pos;
e->k.size = uncompressed_size;
+ e->k.version = op->version;
+ bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED);
bch_extent_crc_append(e, compressed_size,
uncompressed_size,
compression_type,
- csum, csum_type);
+ nonce, csum, csum_type);
bch_alloc_sectors_append_ptrs(op->c, e, op->nr_replicas,
ob, compressed_size);
@@ -417,7 +435,7 @@ static int bch_write_extent(struct bch_write_op *op,
unsigned key_to_write_offset = op->insert_keys.top_p -
op->insert_keys.keys_p;
struct bkey_i *key_to_write;
- unsigned csum_type = c->opts.data_checksum;
+ unsigned csum_type = op->csum_type;
unsigned compression_type = op->compression_type;
int ret;
@@ -426,8 +444,8 @@ static int bch_write_extent(struct bch_write_op *op,
/* Need to decompress data? */
if ((op->flags & BCH_WRITE_DATA_COMPRESSED) &&
- (op->crc.uncompressed_size != op->size ||
- op->crc.compressed_size > ob->sectors_free)) {
+ (crc_uncompressed_size(NULL, &op->crc) != op->size ||
+ crc_compressed_size(NULL, &op->crc) > ob->sectors_free)) {
int ret;
ret = bch_bio_uncompress_inplace(c, orig, op->size, op->crc);
@@ -439,9 +457,10 @@ static int bch_write_extent(struct bch_write_op *op,
if (op->flags & BCH_WRITE_DATA_COMPRESSED) {
init_append_extent(op,
- op->crc.compressed_size,
- op->crc.uncompressed_size,
+ crc_compressed_size(NULL, &op->crc),
+ crc_uncompressed_size(NULL, &op->crc),
op->crc.compression_type,
+ op->crc.nonce,
op->crc.csum,
op->crc.csum_type,
ob);
@@ -457,7 +476,10 @@ static int bch_write_extent(struct bch_write_op *op,
/* all units here in bytes */
unsigned total_output = 0, output_available =
min(ob->sectors_free << 9, orig->bi_iter.bi_size);
- u64 csum;
+ unsigned crc_nonce = bch_csum_type_is_encryption(csum_type)
+ ? op->nonce : 0;
+ struct bch_csum csum;
+ struct nonce nonce;
bio = bio_alloc_bioset(GFP_NOIO,
DIV_ROUND_UP(output_available, PAGE_SIZE),
@@ -489,13 +511,20 @@ static int bch_write_extent(struct bch_write_op *op,
BUG_ON(src_len & (block_bytes(c) - 1));
swap(bio->bi_iter.bi_size, dst_len);
- csum = bch_checksum_bio(bio, csum_type);
+ nonce = extent_nonce(op->version,
+ crc_nonce,
+ src_len >> 9,
+ compression_type),
+
+ bch_encrypt_bio(c, csum_type, nonce, bio);
+
+ csum = bch_checksum_bio(c, csum_type, nonce, bio);
swap(bio->bi_iter.bi_size, dst_len);
init_append_extent(op,
dst_len >> 9, src_len >> 9,
fragment_compression_type,
- csum, csum_type, ob);
+ crc_nonce, csum, csum_type, ob);
total_output += dst_len;
bio_advance(bio, dst_len);
@@ -531,7 +560,8 @@ static int bch_write_extent(struct bch_write_op *op,
wbio->put_bio = bio != orig;
init_append_extent(op, bio_sectors(bio), bio_sectors(bio),
- compression_type, 0, csum_type, ob);
+ compression_type, 0,
+ (struct bch_csum) { 0 }, csum_type, ob);
ret = bio != orig;
}
@@ -546,8 +576,7 @@ static int bch_write_extent(struct bch_write_op *op,
key_to_write = (void *) (op->insert_keys.keys_p + key_to_write_offset);
- if (!(op->flags & BCH_WRITE_CACHED))
- bch_check_mark_super(c, key_to_write, false);
+ bch_check_mark_super(c, key_to_write, false);
#ifndef CONFIG_BCACHE_NO_IO
bch_submit_wbio_replicas(to_wbio(bio), c, key_to_write, false);
@@ -748,6 +777,11 @@ void bch_write(struct closure *cl)
closure_return(cl);
}
+ if (bversion_zero(op->version) &&
+ bch_csum_type_is_encryption(op->csum_type))
+ op->version.lo =
+ atomic64_inc_return(&c->key_version) + 1;
+
if (!(op->flags & BCH_WRITE_DISCARD))
bch_increment_clock(c, bio_sectors(bio), WRITE);
@@ -804,17 +838,21 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
struct write_point *wp, struct bpos pos,
u64 *journal_seq, unsigned flags)
{
+ EBUG_ON(res.sectors && !res.nr_replicas);
+
op->c = c;
op->io_wq = index_update_wq(op);
op->bio = bio;
op->written = 0;
op->error = 0;
op->flags = flags;
+ op->csum_type = bch_data_checksum_type(c);
op->compression_type = c->opts.compression;
op->nr_replicas = res.nr_replicas;
op->alloc_reserve = RESERVE_NONE;
+ op->nonce = 0;
op->pos = pos;
- op->version = 0;
+ op->version = ZERO_VERSION;
op->res = res;
op->wp = wp;
@@ -853,7 +891,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
* appropriately inode_truncate should call this
*/
int bch_discard(struct cache_set *c, struct bpos start,
- struct bpos end, u64 version,
+ struct bpos end, struct bversion version,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq)
@@ -878,7 +916,11 @@ static int bio_checksum_uncompress(struct cache_set *c,
struct bio *src = &rbio->bio;
struct bio *dst = &bch_rbio_parent(rbio)->bio;
struct bvec_iter dst_iter = rbio->parent_iter;
- u64 csum;
+ struct nonce nonce = extent_nonce(rbio->version,
+ rbio->crc.nonce,
+ crc_uncompressed_size(NULL, &rbio->crc),
+ rbio->crc.compression_type);
+ struct bch_csum csum;
int ret = 0;
/*
@@ -888,18 +930,19 @@ static int bio_checksum_uncompress(struct cache_set *c,
* in order to promote
*/
if (rbio->bounce) {
- src->bi_iter.bi_size = rbio->crc.compressed_size << 9;
- src->bi_iter.bi_idx = 0;
- src->bi_iter.bi_bvec_done = 0;
+ src->bi_iter.bi_size = crc_compressed_size(NULL, &rbio->crc) << 9;
+ src->bi_iter.bi_idx = 0;
+ src->bi_iter.bi_bvec_done = 0;
} else {
src->bi_iter = rbio->parent_iter;
}
- csum = bch_checksum_bio(src, rbio->crc.csum_type);
- if (cache_nonfatal_io_err_on(rbio->crc.csum != csum, rbio->ca,
- "data checksum error, inode %llu offset %llu: expected %0llx got %0llx (type %u)",
+ csum = bch_checksum_bio(c, rbio->crc.csum_type, nonce, src);
+ if (cache_nonfatal_io_err_on(bch_crc_cmp(rbio->crc.csum, csum), rbio->ca,
+ "data checksum error, inode %llu offset %llu: expected %0llx%0llx got %0llx%0llx (type %u)",
rbio->inode, (u64) rbio->parent_iter.bi_sector << 9,
- rbio->crc.csum, csum, rbio->crc.csum_type))
+ rbio->crc.csum.hi, rbio->crc.csum.lo, csum.hi, csum.lo,
+ rbio->crc.csum_type))
ret = -EIO;
/*
@@ -908,6 +951,7 @@ static int bio_checksum_uncompress(struct cache_set *c,
*/
if (rbio->crc.compression_type != BCH_COMPRESSION_NONE) {
if (!ret) {
+ bch_encrypt_bio(c, rbio->crc.csum_type, nonce, src);
ret = bch_bio_uncompress(c, src, dst,
dst_iter, rbio->crc);
if (ret)
@@ -915,8 +959,20 @@ static int bio_checksum_uncompress(struct cache_set *c,
}
} else if (rbio->bounce) {
bio_advance(src, rbio->crc.offset << 9);
+
+ /* don't need to decrypt the entire bio: */
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+ src->bi_iter.bi_size = dst_iter.bi_size;
+
+ nonce = nonce_add(nonce, rbio->crc.offset << 9);
+
+ bch_encrypt_bio(c, rbio->crc.csum_type,
+ nonce, src);
+
bio_copy_data_iter(dst, dst_iter,
src, src->bi_iter);
+ } else {
+ bch_encrypt_bio(c, rbio->crc.csum_type, nonce, src);
}
return ret;
@@ -1108,7 +1164,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
*/
unsigned sectors =
max_t(unsigned, k.k->size,
- pick->crc.uncompressed_size);
+ crc_uncompressed_size(NULL, &pick->crc));
unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
promote_op = kmalloc(sizeof(*promote_op) +
@@ -1130,7 +1186,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
*/
if (pick->crc.compression_type != BCH_COMPRESSION_NONE ||
(pick->crc.csum_type != BCH_CSUM_NONE &&
- (bvec_iter_sectors(iter) != pick->crc.uncompressed_size ||
+ (bvec_iter_sectors(iter) != crc_uncompressed_size(NULL, &pick->crc) ||
(flags & BCH_READ_FORCE_BOUNCE)))) {
read_full = true;
bounce = true;
@@ -1138,7 +1194,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
if (bounce) {
unsigned sectors = read_full
- ? (pick->crc.compressed_size ?: k.k->size)
+ ? (crc_compressed_size(NULL, &pick->crc) ?: k.k->size)
: bvec_iter_sectors(iter);
rbio = container_of(bio_alloc_bioset(GFP_NOIO,
@@ -1183,6 +1239,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
rbio->flags = flags;
rbio->bounce = bounce;
rbio->split = split;
+ rbio->version = k.k->version;
rbio->crc = pick->crc;
/*
* crc.compressed_size will be 0 if there wasn't any checksum
@@ -1190,7 +1247,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
* bounced (which isn't necessarily the original key size, if we bounced
* only for promoting)
*/
- rbio->crc.compressed_size = bio_sectors(&rbio->bio);
+ rbio->crc._compressed_size = bio_sectors(&rbio->bio) - 1;
rbio->ptr = pick->ptr;
rbio->ca = pick->ca;
rbio->promote = promote_op;
@@ -1210,7 +1267,8 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
bch_migrate_write_init(c, &promote_op->write,
&c->promote_write_point,
k, NULL,
- BCH_WRITE_ALLOC_NOWAIT);
+ BCH_WRITE_ALLOC_NOWAIT|
+ BCH_WRITE_CACHED);
promote_op->write.promote = true;
if (rbio->crc.compression_type) {
diff --git a/libbcache/io.h b/libbcache/io.h
index b7668b4e..99e51089 100644
--- a/libbcache/io.h
+++ b/libbcache/io.h
@@ -79,7 +79,7 @@ void bch_submit_wbio_replicas(struct bch_write_bio *, struct cache_set *,
const struct bkey_i *, bool);
int bch_discard(struct cache_set *, struct bpos, struct bpos,
- u64, struct disk_reservation *,
+ struct bversion, struct disk_reservation *,
struct extent_insert_hook *, u64 *);
void bch_read_retry_work(struct work_struct *);
diff --git a/libbcache/io_types.h b/libbcache/io_types.h
index f7d99cdb..64269d94 100644
--- a/libbcache/io_types.h
+++ b/libbcache/io_types.h
@@ -43,7 +43,8 @@ struct bch_read_bio {
u8 bounce:1,
split:1;
- struct bch_extent_crc64 crc;
+ struct bversion version;
+ struct bch_extent_crc128 crc;
struct bch_extent_ptr ptr;
struct cache *ca;
@@ -101,15 +102,17 @@ struct bch_write_op {
short error;
u16 flags;
+ unsigned csum_type:4;
unsigned compression_type:4;
unsigned nr_replicas:4;
unsigned alloc_reserve:4;
+ unsigned nonce:14;
struct bpos pos;
- unsigned version;
+ struct bversion version;
/* For BCH_WRITE_DATA_COMPRESSED: */
- struct bch_extent_crc64 crc;
+ struct bch_extent_crc128 crc;
unsigned size;
struct disk_reservation res;
diff --git a/libbcache/journal.c b/libbcache/journal.c
index 9e09b86d..3bb9e3c3 100644
--- a/libbcache/journal.c
+++ b/libbcache/journal.c
@@ -18,7 +18,8 @@
#include "io.h"
#include "keylist.h"
#include "journal.h"
-#include "super.h"
+#include "super-io.h"
+#include "vstructs.h"
#include <trace/events/bcache.h>
@@ -52,19 +53,14 @@ static inline u64 journal_pin_seq(struct journal *j,
return last_seq(j) + fifo_entry_idx(&j->pin, pin_list);
}
-#define for_each_jset_entry(entry, jset) \
- for (entry = (jset)->start; \
- entry < bkey_idx(jset, le32_to_cpu((jset)->u64s)); \
- entry = jset_keys_next(entry))
-
static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
struct jset_entry *entry, unsigned type)
{
- while (entry < bkey_idx(jset, le32_to_cpu(jset->u64s))) {
+ while (entry < vstruct_last(jset)) {
if (JOURNAL_ENTRY_TYPE(entry) == type)
return entry;
- entry = jset_keys_next(entry);
+ entry = vstruct_next(entry);
}
return NULL;
@@ -73,14 +69,11 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
#define for_each_jset_entry_type(entry, jset, type) \
for (entry = (jset)->start; \
(entry = __jset_entry_type_next(jset, entry, type)); \
- entry = jset_keys_next(entry))
+ entry = vstruct_next(entry))
#define for_each_jset_key(k, _n, entry, jset) \
for_each_jset_entry_type(entry, jset, JOURNAL_ENTRY_BTREE_KEYS) \
- for (k = (entry)->start; \
- (k < bkey_idx(entry, le16_to_cpu((entry)->u64s)) &&\
- (_n = bkey_next(k), 1)); \
- k = _n)
+ vstruct_for_each_safe(entry, k, _n)
static inline void bch_journal_add_entry(struct journal_buf *buf,
const void *data, size_t u64s,
@@ -199,8 +192,6 @@ redo_peek:
closure_sync(&cl);
- mutex_lock(&c->btree_interior_update_lock);
-
for (i = 0;; i++) {
struct btree_interior_update *as;
struct pending_btree_node_free *d;
@@ -212,6 +203,8 @@ redo_peek:
}
n = bl->entries[i];
mutex_unlock(&j->blacklist_lock);
+redo_wait:
+ mutex_lock(&c->btree_interior_update_lock);
/*
* Is the node on the list of pending interior node updates -
@@ -225,11 +218,11 @@ redo_peek:
closure_wait(&as->wait, &cl);
mutex_unlock(&c->btree_interior_update_lock);
closure_sync(&cl);
- break;
+ goto redo_wait;
}
- }
- mutex_unlock(&c->btree_interior_update_lock);
+ mutex_unlock(&c->btree_interior_update_lock);
+ }
mutex_lock(&j->blacklist_lock);
@@ -377,7 +370,6 @@ out:
struct journal_list {
struct closure cl;
struct mutex lock;
- struct mutex cache_set_buffer_lock;
struct list_head *head;
int ret;
};
@@ -394,7 +386,7 @@ static int journal_entry_add(struct cache_set *c, struct journal_list *jlist,
{
struct journal_replay *i, *pos;
struct list_head *where;
- size_t bytes = __set_bytes(j, le32_to_cpu(j->u64s));
+ size_t bytes = vstruct_bytes(j);
__le64 last_seq;
int ret;
@@ -422,8 +414,7 @@ static int journal_entry_add(struct cache_set *c, struct journal_list *jlist,
list_for_each_entry_reverse(i, jlist->head, list) {
/* Duplicate? */
if (le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
- fsck_err_on(bytes != __set_bytes(&i->j,
- le32_to_cpu(i->j.u64s)) ||
+ fsck_err_on(bytes != vstruct_bytes(&i->j) ||
memcmp(j, &i->j, bytes), c,
"found duplicate but non identical journal entries (seq %llu)",
le64_to_cpu(j->seq));
@@ -455,11 +446,21 @@ fsck_err:
return ret;
}
+static struct nonce journal_nonce(const struct jset *jset)
+{
+ return (struct nonce) {{
+ [0] = 0,
+ [1] = ((__le32 *) &jset->seq)[0],
+ [2] = ((__le32 *) &jset->seq)[1],
+ [3] = BCH_NONCE_JOURNAL,
+ }};
+}
+
static void journal_entry_null_range(void *start, void *end)
{
struct jset_entry *entry;
- for (entry = start; entry != end; entry = jset_keys_next(entry)) {
+ for (entry = start; entry != end; entry = vstruct_next(entry)) {
entry->u64s = 0;
entry->btree_id = 0;
entry->level = 0;
@@ -473,7 +474,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
struct bkey_i *k, enum bkey_type key_type,
const char *type)
{
- void *next = jset_keys_next(entry);
+ void *next = vstruct_next(entry);
const char *invalid;
char buf[160];
int ret = 0;
@@ -481,16 +482,16 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
if (mustfix_fsck_err_on(!k->k.u64s, c,
"invalid %s in journal: k->u64s 0", type)) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
- journal_entry_null_range(jset_keys_next(entry), next);
+ journal_entry_null_range(vstruct_next(entry), next);
return 0;
}
if (mustfix_fsck_err_on((void *) bkey_next(k) >
- (void *) jset_keys_next(entry), c,
+ (void *) vstruct_next(entry), c,
"invalid %s in journal: extends past end of journal entry",
type)) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
- journal_entry_null_range(jset_keys_next(entry), next);
+ journal_entry_null_range(vstruct_next(entry), next);
return 0;
}
@@ -499,7 +500,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
type, k->k.format)) {
le16_add_cpu(&entry->u64s, -k->k.u64s);
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
- journal_entry_null_range(jset_keys_next(entry), next);
+ journal_entry_null_range(vstruct_next(entry), next);
return 0;
}
@@ -514,7 +515,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
le16_add_cpu(&entry->u64s, -k->k.u64s);
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
- journal_entry_null_range(jset_keys_next(entry), next);
+ journal_entry_null_range(vstruct_next(entry), next);
return 0;
}
fsck_err:
@@ -525,16 +526,17 @@ fsck_err:
#define JOURNAL_ENTRY_NONE 6
#define JOURNAL_ENTRY_BAD 7
-static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 sector,
+static int journal_entry_validate(struct cache_set *c,
+ struct jset *j, u64 sector,
unsigned bucket_sectors_left,
unsigned sectors_read)
{
struct jset_entry *entry;
- size_t bytes = __set_bytes(j, le32_to_cpu(j->u64s));
- u64 got, expect;
+ size_t bytes = vstruct_bytes(j);
+ struct bch_csum csum;
int ret = 0;
- if (le64_to_cpu(j->magic) != jset_magic(&c->disk_sb))
+ if (le64_to_cpu(j->magic) != jset_magic(c))
return JOURNAL_ENTRY_NONE;
if (le32_to_cpu(j->version) != BCACHE_JSET_VERSION) {
@@ -554,25 +556,32 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
if (bytes > sectors_read << 9)
return JOURNAL_ENTRY_REREAD;
- got = le64_to_cpu(j->csum);
- expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j));
- if (mustfix_fsck_err_on(got != expect, c,
- "journal checksum bad (got %llu expect %llu), sector %lluu",
- got, expect, sector)) {
+ if (fsck_err_on(!bch_checksum_type_valid(c, JSET_CSUM_TYPE(j)), c,
+ "journal entry with unknown csum type %llu sector %lluu",
+ JSET_CSUM_TYPE(j), sector))
+ return JOURNAL_ENTRY_BAD;
+
+ csum = csum_vstruct(c, JSET_CSUM_TYPE(j), journal_nonce(j), j);
+ if (mustfix_fsck_err_on(bch_crc_cmp(csum, j->csum), c,
+ "journal checksum bad, sector %llu", sector)) {
/* XXX: retry IO, when we start retrying checksum errors */
/* XXX: note we might have missing journal entries */
return JOURNAL_ENTRY_BAD;
}
- if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq),
- c, "invalid journal entry: last_seq > seq"))
+ bch_encrypt(c, JSET_CSUM_TYPE(j), journal_nonce(j),
+ j->encrypted_start,
+ vstruct_end(j) - (void *) j->encrypted_start);
+
+ if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
+ "invalid journal entry: last_seq > seq"))
j->last_seq = j->seq;
- for_each_jset_entry(entry, j) {
+ vstruct_for_each(j, entry) {
struct bkey_i *k;
- if (mustfix_fsck_err_on(jset_keys_next(entry) >
- bkey_idx(j, le32_to_cpu(j->u64s)), c,
+ if (mustfix_fsck_err_on(vstruct_next(entry) >
+ vstruct_last(j), c,
"journal entry extents past end of jset")) {
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
break;
@@ -580,9 +589,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
switch (JOURNAL_ENTRY_TYPE(entry)) {
case JOURNAL_ENTRY_BTREE_KEYS:
- for (k = entry->start;
- k < bkey_idx(entry, le16_to_cpu(entry->u64s));
- k = bkey_next(k)) {
+ vstruct_for_each(entry, k) {
ret = journal_validate_key(c, j, entry, k,
bkey_type(entry->level,
entry->btree_id),
@@ -599,7 +606,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
le16_to_cpu(entry->u64s) != k->k.u64s, c,
"invalid btree root journal entry: wrong number of keys")) {
journal_entry_null_range(entry,
- jset_keys_next(entry));
+ vstruct_next(entry));
continue;
}
@@ -616,14 +623,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
"invalid journal seq blacklist entry: bad size")) {
journal_entry_null_range(entry,
- jset_keys_next(entry));
+ vstruct_next(entry));
}
break;
default:
mustfix_fsck_err(c, "invalid journal entry type %llu",
JOURNAL_ENTRY_TYPE(entry));
- journal_entry_null_range(entry, jset_keys_next(entry));
+ journal_entry_null_range(entry, vstruct_next(entry));
break;
}
}
@@ -632,126 +639,127 @@ fsck_err:
return ret;
}
-static int journal_read_bucket(struct cache *ca, struct journal_list *jlist,
+struct journal_read_buf {
+ void *data;
+ size_t size;
+};
+
+static int journal_read_buf_realloc(struct journal_read_buf *b,
+ size_t new_size)
+{
+ void *n;
+
+ new_size = roundup_pow_of_two(new_size);
+ n = (void *) __get_free_pages(GFP_KERNEL, get_order(new_size));
+ if (!n)
+ return -ENOMEM;
+
+ free_pages((unsigned long) b->data, get_order(b->size));
+ b->data = n;
+ b->size = new_size;
+ return 0;
+}
+
+static int journal_read_bucket(struct cache *ca,
+ struct journal_read_buf *buf,
+ struct journal_list *jlist,
unsigned bucket, u64 *seq, bool *entries_found)
{
struct cache_set *c = ca->set;
struct journal_device *ja = &ca->journal;
struct bio *bio = ja->bio;
- struct jset *j, *data;
- unsigned blocks, sectors_read, bucket_offset = 0;
- unsigned max_entry_sectors = c->journal.entry_size_max >> 9;
- u64 sector = bucket_to_sector(ca,
- journal_bucket(ca->disk_sb.sb, bucket));
+ struct jset *j = NULL;
+ unsigned sectors, sectors_read = 0;
+ u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
+ end = offset + ca->mi.bucket_size;
bool saw_bad = false;
int ret = 0;
- data = (void *) __get_free_pages(GFP_KERNEL,
- get_order(c->journal.entry_size_max));
- if (!data) {
- mutex_lock(&jlist->cache_set_buffer_lock);
- data = c->journal.buf[0].data;
- }
-
pr_debug("reading %u", bucket);
- while (bucket_offset < ca->mi.bucket_size) {
-reread:
- sectors_read = min_t(unsigned,
- ca->mi.bucket_size - bucket_offset,
- max_entry_sectors);
+ while (offset < end) {
+ if (!sectors_read) {
+reread: sectors_read = min_t(unsigned,
+ end - offset, buf->size >> 9);
- bio_reset(bio);
- bio->bi_bdev = ca->disk_sb.bdev;
- bio->bi_iter.bi_sector = sector + bucket_offset;
- bio->bi_iter.bi_size = sectors_read << 9;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
- bch_bio_map(bio, data);
-
- ret = submit_bio_wait(bio);
-
- if (cache_fatal_io_err_on(ret, ca,
- "journal read from sector %llu",
- sector + bucket_offset) ||
- bch_meta_read_fault("journal")) {
- ret = -EIO;
- goto err;
- }
+ bio_reset(bio);
+ bio->bi_bdev = ca->disk_sb.bdev;
+ bio->bi_iter.bi_sector = offset;
+ bio->bi_iter.bi_size = sectors_read << 9;
+ bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bch_bio_map(bio, buf->data);
- /* This function could be simpler now since we no longer write
- * journal entries that overlap bucket boundaries; this means
- * the start of a bucket will always have a valid journal entry
- * if it has any journal entries at all.
- */
+ ret = submit_bio_wait(bio);
- j = data;
- while (sectors_read) {
- ret = journal_entry_validate(c, j,
- sector + bucket_offset,
- ca->mi.bucket_size - bucket_offset,
- sectors_read);
- switch (ret) {
- case BCH_FSCK_OK:
- break;
- case JOURNAL_ENTRY_REREAD:
- goto reread;
- case JOURNAL_ENTRY_NONE:
- if (!saw_bad)
- goto out;
- blocks = 1;
- goto next_block;
- case JOURNAL_ENTRY_BAD:
- saw_bad = true;
- blocks = 1;
- goto next_block;
- default:
- goto err;
- }
+ if (cache_fatal_io_err_on(ret, ca,
+ "journal read from sector %llu",
+ offset) ||
+ bch_meta_read_fault("journal"))
+ return -EIO;
- /*
- * This happens sometimes if we don't have discards on -
- * when we've partially overwritten a bucket with new
- * journal entries. We don't need the rest of the
- * bucket:
- */
- if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
- goto out;
-
- ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
-
- ret = journal_entry_add(c, jlist, j);
- switch (ret) {
- case JOURNAL_ENTRY_ADD_OK:
- *entries_found = true;
- break;
- case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
- break;
- default:
- goto err;
+ j = buf->data;
+ }
+
+ ret = journal_entry_validate(c, j, offset,
+ end - offset, sectors_read);
+ switch (ret) {
+ case BCH_FSCK_OK:
+ break;
+ case JOURNAL_ENTRY_REREAD:
+ if (vstruct_bytes(j) > buf->size) {
+ ret = journal_read_buf_realloc(buf,
+ vstruct_bytes(j));
+ if (ret)
+ return ret;
}
+ goto reread;
+ case JOURNAL_ENTRY_NONE:
+ if (!saw_bad)
+ return 0;
+ sectors = c->sb.block_size;
+ goto next_block;
+ case JOURNAL_ENTRY_BAD:
+ saw_bad = true;
+ sectors = c->sb.block_size;
+ goto next_block;
+ default:
+ return ret;
+ }
- if (le64_to_cpu(j->seq) > *seq)
- *seq = le64_to_cpu(j->seq);
-next_block:
- blocks = __set_blocks(j, le32_to_cpu(j->u64s),
- block_bytes(c));
+ /*
+ * This happens sometimes if we don't have discards on -
+ * when we've partially overwritten a bucket with new
+ * journal entries. We don't need the rest of the
+ * bucket:
+ */
+ if (le64_to_cpu(j->seq) < ja->bucket_seq[bucket])
+ return 0;
+
+ ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
- pr_debug("next");
- bucket_offset += blocks * c->sb.block_size;
- sectors_read -= blocks * c->sb.block_size;
- j = ((void *) j) + blocks * block_bytes(c);
+ ret = journal_entry_add(c, jlist, j);
+ switch (ret) {
+ case JOURNAL_ENTRY_ADD_OK:
+ *entries_found = true;
+ break;
+ case JOURNAL_ENTRY_ADD_OUT_OF_RANGE:
+ break;
+ default:
+ return ret;
}
+
+ if (le64_to_cpu(j->seq) > *seq)
+ *seq = le64_to_cpu(j->seq);
+
+ sectors = vstruct_sectors(j, c->block_bits);
+next_block:
+ pr_debug("next");
+ offset += sectors;
+ sectors_read -= sectors;
+ j = ((void *) j) + (sectors << 9);
}
-out:
- ret = 0;
-err:
- if (data == c->journal.buf[0].data)
- mutex_unlock(&jlist->cache_set_buffer_lock);
- else
- free_pages((unsigned long) data,
- get_order(c->journal.entry_size_max));
- return ret;
+ return 0;
}
static void bch_journal_read_device(struct closure *cl)
@@ -759,15 +767,11 @@ static void bch_journal_read_device(struct closure *cl)
#define read_bucket(b) \
({ \
bool entries_found = false; \
- int ret = journal_read_bucket(ca, jlist, b, \
- &seq, &entries_found); \
+ ret = journal_read_bucket(ca, &buf, jlist, b, &seq, \
+ &entries_found); \
+ if (ret) \
+ goto err; \
__set_bit(b, bitmap); \
- if (ret) { \
- mutex_lock(&jlist->lock); \
- jlist->ret = ret; \
- mutex_unlock(&jlist->lock); \
- closure_return(cl); \
- } \
entries_found; \
})
@@ -777,24 +781,29 @@ static void bch_journal_read_device(struct closure *cl)
struct journal_list *jlist =
container_of(cl->parent, struct journal_list, cl);
struct request_queue *q = bdev_get_queue(ca->disk_sb.bdev);
+ struct journal_read_buf buf = { NULL, 0 };
- unsigned nr_buckets = bch_nr_journal_buckets(ca->disk_sb.sb);
- DECLARE_BITMAP(bitmap, nr_buckets);
+ DECLARE_BITMAP(bitmap, ja->nr);
unsigned i, l, r;
u64 seq = 0;
+ int ret;
- if (!nr_buckets)
- closure_return(cl);
+ if (!ja->nr)
+ goto out;
+
+ bitmap_zero(bitmap, ja->nr);
+ ret = journal_read_buf_realloc(&buf, PAGE_SIZE);
+ if (ret)
+ goto err;
- bitmap_zero(bitmap, nr_buckets);
- pr_debug("%u journal buckets", nr_buckets);
+ pr_debug("%u journal buckets", ja->nr);
/*
* If the device supports discard but not secure discard, we can't do
* the fancy fibonacci hash/binary search because the live journal
* entries might not form a contiguous range:
*/
- for (i = 0; i < nr_buckets; i++)
+ for (i = 0; i < ja->nr; i++)
read_bucket(i);
goto search_done;
@@ -805,8 +814,8 @@ static void bch_journal_read_device(struct closure *cl)
* Read journal buckets ordered by golden ratio hash to quickly
* find a sequence of buckets with valid journal entries
*/
- for (i = 0; i < nr_buckets; i++) {
- l = (i * 2654435769U) % nr_buckets;
+ for (i = 0; i < ja->nr; i++) {
+ l = (i * 2654435769U) % ja->nr;
if (test_bit(l, bitmap))
break;
@@ -821,18 +830,18 @@ static void bch_journal_read_device(struct closure *cl)
*/
pr_debug("falling back to linear search");
linear_scan:
- for (l = find_first_zero_bit(bitmap, nr_buckets);
- l < nr_buckets;
- l = find_next_zero_bit(bitmap, nr_buckets, l + 1))
+ for (l = find_first_zero_bit(bitmap, ja->nr);
+ l < ja->nr;
+ l = find_next_zero_bit(bitmap, ja->nr, l + 1))
if (read_bucket(l))
goto bsearch;
/* no journal entries on this device? */
- if (l == nr_buckets)
- closure_return(cl);
+ if (l == ja->nr)
+ goto out;
bsearch:
/* Binary search */
- r = find_next_bit(bitmap, nr_buckets, l + 1);
+ r = find_next_bit(bitmap, ja->nr, l + 1);
pr_debug("starting binary search, l %u r %u", l, r);
while (l + 1 < r) {
@@ -858,9 +867,9 @@ search_done:
*/
seq = 0;
- for (i = 0; i < nr_buckets; i++)
+ for (i = 0; i < ja->nr; i++)
if (ja->bucket_seq[i] >= seq &&
- ja->bucket_seq[i] != ja->bucket_seq[(i + 1) % nr_buckets]) {
+ ja->bucket_seq[i] != ja->bucket_seq[(i + 1) % ja->nr]) {
/*
* When journal_next_bucket() goes to allocate for
* the first time, it'll use the bucket after
@@ -875,20 +884,26 @@ search_done:
* reclaimed - journal reclaim will immediately reclaim whatever isn't
* pinned when it first runs:
*/
- ja->last_idx = (ja->cur_idx + 1) % nr_buckets;
+ ja->last_idx = (ja->cur_idx + 1) % ja->nr;
/*
* Read buckets in reverse order until we stop finding more journal
* entries:
*/
- for (i = (ja->cur_idx + nr_buckets - 1) % nr_buckets;
+ for (i = (ja->cur_idx + ja->nr - 1) % ja->nr;
i != ja->cur_idx;
- i = (i + nr_buckets - 1) % nr_buckets)
+ i = (i + ja->nr - 1) % ja->nr)
if (!test_bit(i, bitmap) &&
!read_bucket(i))
break;
-
+out:
+ free_pages((unsigned long) buf.data, get_order(buf.size));
closure_return(cl);
+err:
+ mutex_lock(&jlist->lock);
+ jlist->ret = ret;
+ mutex_unlock(&jlist->lock);
+ goto out;
#undef read_bucket
}
@@ -930,6 +945,19 @@ static int journal_seq_blacklist_read(struct journal *j,
return 0;
}
+static inline bool journal_has_keys(struct list_head *list)
+{
+ struct journal_replay *i;
+ struct jset_entry *entry;
+ struct bkey_i *k, *_n;
+
+ list_for_each_entry(i, list, list)
+ for_each_jset_key(k, _n, entry, &i->j)
+ return true;
+
+ return false;
+}
+
int bch_journal_read(struct cache_set *c, struct list_head *list)
{
struct jset_entry *prio_ptrs;
@@ -944,7 +972,6 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
closure_init_stack(&jlist.cl);
mutex_init(&jlist.lock);
- mutex_init(&jlist.cache_set_buffer_lock);
jlist.head = list;
jlist.ret = 0;
@@ -964,6 +991,9 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
return BCH_FSCK_REPAIR_IMPOSSIBLE;
}
+ fsck_err_on(c->sb.clean && journal_has_keys(list), c,
+ "filesystem marked clean but journal has keys to replay");
+
j = &list_entry(list->prev, struct journal_replay, list)->j;
unfixable_fsck_err_on(le64_to_cpu(j->seq) -
@@ -1057,7 +1087,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
if (btree_type_has_ptrs(type))
- __bch_btree_mark_key(c, type, k_s_c);
+ bch_btree_mark_key_initial(c, type, k_s_c);
}
}
@@ -1171,10 +1201,9 @@ static enum {
buf->data->last_seq = cpu_to_le64(last_seq(j));
j->prev_buf_sectors =
- __set_blocks(buf->data,
- le32_to_cpu(buf->data->u64s) +
- journal_entry_u64s_reserve(buf),
- block_bytes(c)) * c->sb.block_size;
+ vstruct_blocks_plus(buf->data, c->block_bits,
+ journal_entry_u64s_reserve(buf)) *
+ c->sb.block_size;
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
@@ -1219,9 +1248,8 @@ static unsigned journal_dev_buckets_available(struct journal *j,
struct cache *ca)
{
struct journal_device *ja = &ca->journal;
- unsigned nr = bch_nr_journal_buckets(ca->disk_sb.sb);
- unsigned next = (ja->cur_idx + 1) % nr;
- unsigned available = (ja->last_idx + nr - next) % nr;
+ unsigned next = (ja->cur_idx + 1) % ja->nr;
+ unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
/*
* Hack to avoid a deadlock during journal replay:
@@ -1271,7 +1299,7 @@ static int journal_entry_sectors(struct journal *j)
* for the previous entry we have to make sure we have space for
* it too:
*/
- if (bch_extent_has_device(e.c, ca->sb.nr_this_dev)) {
+ if (bch_extent_has_device(e.c, ca->dev_idx)) {
if (j->prev_buf_sectors > ca->journal.sectors_free)
buckets_required++;
@@ -1479,17 +1507,28 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list)
entries++;
}
+ if (keys) {
+ bch_btree_flush(c);
+
+ /*
+ * Write a new journal entry _before_ we start journalling new data -
+ * otherwise, we could end up with btree node bsets with journal seqs
+ * arbitrarily far in the future vs. the most recently written journal
+ * entry on disk, if we crash before writing the next journal entry:
+ */
+ ret = bch_journal_meta(&c->journal);
+ if (ret)
+ goto err;
+ }
+
bch_info(c, "journal replay done, %i keys in %i entries, seq %llu",
keys, entries, (u64) atomic64_read(&j->seq));
- fsck_err_on(c->sb.clean && keys, c,
- "filesystem marked clean, but journal had keys to replay");
-
bch_journal_set_replay_done(&c->journal);
err:
if (ret)
bch_err(c, "journal replay error: %d", ret);
-fsck_err:
+
bch_journal_entries_free(list);
return ret;
@@ -1497,28 +1536,40 @@ fsck_err:
static int bch_set_nr_journal_buckets(struct cache *ca, unsigned nr)
{
- unsigned u64s = bch_journal_buckets_offset(ca->disk_sb.sb) + nr;
+ struct journal_device *ja = &ca->journal;
+ struct bch_sb_field_journal *journal_buckets =
+ bch_sb_get_journal(ca->disk_sb.sb);
+ struct bch_sb_field *f;
u64 *p;
- int ret;
- ret = bch_super_realloc(&ca->disk_sb, u64s);
- if (ret)
- return ret;
+ p = krealloc(ja->bucket_seq, nr * sizeof(u64),
+ GFP_KERNEL|__GFP_ZERO);
+ if (!p)
+ return -ENOMEM;
+
+ ja->bucket_seq = p;
- p = krealloc(ca->journal.bucket_seq,
- nr * sizeof(u64),
+ p = krealloc(ja->buckets, nr * sizeof(u64),
GFP_KERNEL|__GFP_ZERO);
if (!p)
return -ENOMEM;
- ca->journal.bucket_seq = p;
- ca->disk_sb.sb->u64s = cpu_to_le16(u64s);
+ ja->buckets = p;
+
+ f = bch_dev_sb_field_resize(&ca->disk_sb, &journal_buckets->field, nr +
+ sizeof(*journal_buckets) / sizeof(u64));
+ if (!f)
+ return -ENOMEM;
+ f->type = BCH_SB_FIELD_journal;
+ ja->nr = nr;
return 0;
}
int bch_cache_journal_alloc(struct cache *ca)
{
+ struct journal_device *ja = &ca->journal;
+ struct bch_sb_field_journal *journal_buckets;
int ret;
unsigned i;
@@ -1540,11 +1591,15 @@ int bch_cache_journal_alloc(struct cache *ca)
if (ret)
return ret;
- for (i = 0; i < bch_nr_journal_buckets(ca->disk_sb.sb); i++) {
- unsigned long r = ca->mi.first_bucket + i;
+ journal_buckets = bch_sb_get_journal(ca->disk_sb.sb);
+
+ for (i = 0; i < ja->nr; i++) {
+ u64 bucket = ca->mi.first_bucket + i;
- bch_mark_metadata_bucket(ca, &ca->buckets[r], true);
- set_journal_bucket(ca->disk_sb.sb, i, r);
+ ja->buckets[i] = bucket;
+ journal_buckets->buckets[i] = cpu_to_le64(bucket);
+
+ bch_mark_metadata_bucket(ca, &ca->buckets[bucket], true);
}
return 0;
@@ -1749,7 +1804,7 @@ static void journal_reclaim_work(struct work_struct *work)
struct cache *ca;
struct journal_entry_pin *pin;
u64 seq_to_flush = 0;
- unsigned iter, nr, bucket_to_flush;
+ unsigned iter, bucket_to_flush;
unsigned long next_flush;
bool reclaim_lock_held = false, need_flush;
@@ -1781,13 +1836,11 @@ static void journal_reclaim_work(struct work_struct *work)
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
- journal_bucket(ca->disk_sb.sb,
- ja->last_idx)),
+ ja->buckets[ja->last_idx]),
ca->mi.bucket_size, GFP_NOIO, 0);
spin_lock(&j->lock);
- ja->last_idx = (ja->last_idx + 1) %
- bch_nr_journal_buckets(ca->disk_sb.sb);
+ ja->last_idx = (ja->last_idx + 1) % ja->nr;
spin_unlock(&j->lock);
wake_up(&j->wait);
@@ -1798,8 +1851,7 @@ static void journal_reclaim_work(struct work_struct *work)
* buckets
*/
spin_lock(&j->lock);
- nr = bch_nr_journal_buckets(ca->disk_sb.sb),
- bucket_to_flush = (ja->cur_idx + (nr >> 1)) % nr;
+ bucket_to_flush = (ja->cur_idx + (ja->nr >> 1)) % ja->nr;
seq_to_flush = max_t(u64, seq_to_flush,
ja->bucket_seq[bucket_to_flush]);
spin_unlock(&j->lock);
@@ -1861,7 +1913,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
*/
extent_for_each_ptr_backwards(e, ptr)
if (!(ca = PTR_CACHE(c, ptr)) ||
- ca->mi.state != CACHE_ACTIVE ||
+ ca->mi.state != BCH_MEMBER_STATE_ACTIVE ||
ca->journal.sectors_free <= sectors)
__bch_extent_drop_ptr(e, ptr);
else
@@ -1875,7 +1927,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
*/
group_for_each_cache_rcu(ca, &j->devs, iter) {
struct journal_device *ja = &ca->journal;
- unsigned nr_buckets = bch_nr_journal_buckets(ca->disk_sb.sb);
if (replicas >= replicas_want)
break;
@@ -1884,21 +1935,20 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
* Check that we can use this device, and aren't already using
* it:
*/
- if (bch_extent_has_device(e.c, ca->sb.nr_this_dev) ||
+ if (bch_extent_has_device(e.c, ca->dev_idx) ||
!journal_dev_buckets_available(j, ca) ||
sectors > ca->mi.bucket_size)
continue;
ja->sectors_free = ca->mi.bucket_size - sectors;
- ja->cur_idx = (ja->cur_idx + 1) % nr_buckets;
+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
ja->bucket_seq[ja->cur_idx] = atomic64_read(&j->seq);
extent_ptr_append(bkey_i_to_extent(&j->key),
(struct bch_extent_ptr) {
.offset = bucket_to_sector(ca,
- journal_bucket(ca->disk_sb.sb,
- ja->cur_idx)),
- .dev = ca->sb.nr_this_dev,
+ ja->buckets[ja->cur_idx]),
+ .dev = ca->dev_idx,
});
replicas++;
@@ -1928,10 +1978,7 @@ static void journal_write_compact(struct jset *jset)
* If we wanted to be really fancy here, we could sort all the keys in
* the jset and drop keys that were overwritten - probably not worth it:
*/
- for (i = jset->start;
- i < (struct jset_entry *) bkey_idx(jset, le32_to_cpu(jset->u64s)) &&
- (next = jset_keys_next(i), true);
- i = next) {
+ vstruct_for_each_safe(jset, i, next) {
unsigned u64s = le16_to_cpu(i->u64s);
/* Empty entry: */
@@ -1945,7 +1992,7 @@ static void journal_write_compact(struct jset *jset)
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_TYPE(prev) &&
JOURNAL_ENTRY_TYPE(i) == JOURNAL_ENTRY_BTREE_KEYS &&
le16_to_cpu(prev->u64s) + u64s <= U16_MAX) {
- memmove_u64s_down(jset_keys_next(prev),
+ memmove_u64s_down(vstruct_next(prev),
i->_data,
u64s);
le16_add_cpu(&prev->u64s, u64s);
@@ -1953,12 +2000,12 @@ static void journal_write_compact(struct jset *jset)
}
/* Couldn't merge, move i into new position (after prev): */
- prev = prev ? jset_keys_next(prev) : jset->start;
+ prev = prev ? vstruct_next(prev) : jset->start;
if (i != prev)
memmove_u64s_down(prev, i, jset_u64s(u64s));
}
- prev = prev ? jset_keys_next(prev) : jset->start;
+ prev = prev ? vstruct_next(prev) : jset->start;
jset->u64s = cpu_to_le32((u64 *) prev - jset->_data);
}
@@ -2019,6 +2066,7 @@ static void journal_write(struct closure *cl)
struct cache_set *c = container_of(j, struct cache_set, journal);
struct cache *ca;
struct journal_buf *w = journal_prev_buf(j);
+ struct jset *jset = w->data;
struct bio *bio;
struct bch_extent_ptr *ptr;
unsigned i, sectors, bytes;
@@ -2036,24 +2084,27 @@ static void journal_write(struct closure *cl)
}
mutex_unlock(&c->btree_root_lock);
- journal_write_compact(w->data);
+ journal_write_compact(jset);
+
+ jset->read_clock = cpu_to_le16(c->prio_clock[READ].hand);
+ jset->write_clock = cpu_to_le16(c->prio_clock[WRITE].hand);
+ jset->magic = cpu_to_le64(jset_magic(c));
+ jset->version = cpu_to_le32(BCACHE_JSET_VERSION);
- w->data->read_clock = cpu_to_le16(c->prio_clock[READ].hand);
- w->data->write_clock = cpu_to_le16(c->prio_clock[WRITE].hand);
- w->data->magic = cpu_to_le64(jset_magic(&c->disk_sb));
- w->data->version = cpu_to_le32(BCACHE_JSET_VERSION);
+ SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN);
+ SET_JSET_CSUM_TYPE(jset, bch_meta_checksum_type(c));
- SET_JSET_BIG_ENDIAN(w->data, CPU_BIG_ENDIAN);
- SET_JSET_CSUM_TYPE(w->data, c->opts.metadata_checksum);
- w->data->csum = cpu_to_le64(__csum_set(w->data,
- le32_to_cpu(w->data->u64s),
- JSET_CSUM_TYPE(w->data)));
+ bch_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
+ jset->encrypted_start,
+ vstruct_end(jset) - (void *) jset->encrypted_start);
- sectors = __set_blocks(w->data, le32_to_cpu(w->data->u64s),
- block_bytes(c)) * c->sb.block_size;
+ jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset),
+ journal_nonce(jset), jset);
+
+ sectors = vstruct_sectors(jset, c->block_bits);
BUG_ON(sectors > j->prev_buf_sectors);
- bytes = __set_bytes(w->data, le32_to_cpu(w->data->u64s));
+ bytes = vstruct_bytes(w->data);
memset((void *) w->data + bytes, 0, (sectors << 9) - bytes);
if (journal_write_alloc(j, sectors)) {
@@ -2096,7 +2147,7 @@ static void journal_write(struct closure *cl)
bio->bi_private = ca;
bio_set_op_attrs(bio, REQ_OP_WRITE,
REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
- bch_bio_map(bio, w->data);
+ bch_bio_map(bio, jset);
trace_bcache_journal_write(bio);
closure_bio_submit_punt(bio, cl, c);
@@ -2105,7 +2156,7 @@ static void journal_write(struct closure *cl)
}
for_each_cache(ca, c, i)
- if (ca->mi.state == CACHE_ACTIVE &&
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
journal_flushes_device(ca) &&
!bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) {
percpu_ref_get(&ca->ref);
@@ -2503,7 +2554,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf)
"\tnr\t\t%u\n"
"\tcur_idx\t\t%u (seq %llu)\n"
"\tlast_idx\t%u (seq %llu)\n",
- iter, bch_nr_journal_buckets(ca->disk_sb.sb),
+ iter, ja->nr,
ja->cur_idx, ja->bucket_seq[ja->cur_idx],
ja->last_idx, ja->bucket_seq[ja->last_idx]);
}
@@ -2521,7 +2572,7 @@ static bool bch_journal_writing_to_device(struct cache *ca)
spin_lock(&j->lock);
ret = bch_extent_has_device(bkey_i_to_s_c_extent(&j->key),
- ca->sb.nr_this_dev);
+ ca->dev_idx);
spin_unlock(&j->lock);
return ret;
@@ -2541,10 +2592,11 @@ static bool bch_journal_writing_to_device(struct cache *ca)
int bch_journal_move(struct cache *ca)
{
- unsigned i, nr_buckets;
u64 last_flushed_seq;
+ struct journal_device *ja = &ca->journal;
struct cache_set *c = ca->set;
struct journal *j = &c->journal;
+ unsigned i;
int ret = 0; /* Success */
if (bch_journal_writing_to_device(ca)) {
@@ -2585,10 +2637,45 @@ int bch_journal_move(struct cache *ca)
last_flushed_seq = last_seq(j);
spin_unlock(&j->lock);
- nr_buckets = bch_nr_journal_buckets(ca->disk_sb.sb);
-
- for (i = 0; i < nr_buckets; i += 1)
- BUG_ON(ca->journal.bucket_seq[i] > last_flushed_seq);
+ for (i = 0; i < ja->nr; i += 1)
+ BUG_ON(ja->bucket_seq[i] > last_flushed_seq);
return ret;
}
+
+void bch_journal_free_cache(struct cache *ca)
+{
+ kfree(ca->journal.buckets);
+ kfree(ca->journal.bucket_seq);
+}
+
+int bch_journal_init_cache(struct cache *ca)
+{
+ struct journal_device *ja = &ca->journal;
+ struct bch_sb_field_journal *journal_buckets =
+ bch_sb_get_journal(ca->disk_sb.sb);
+ unsigned i, journal_entry_pages;
+
+ journal_entry_pages =
+ DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb),
+ PAGE_SECTORS);
+
+ ja->nr = bch_nr_journal_buckets(journal_buckets);
+
+ ja->bucket_seq = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+ if (!ja->bucket_seq)
+ return -ENOMEM;
+
+ ca->journal.bio = bio_kmalloc(GFP_KERNEL, journal_entry_pages);
+ if (!ca->journal.bio)
+ return -ENOMEM;
+
+ ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
+ if (!ja->buckets)
+ return -ENOMEM;
+
+ for (i = 0; i < ja->nr; i++)
+ ja->buckets[i] = le64_to_cpu(journal_buckets->buckets[i]);
+
+ return 0;
+}
diff --git a/libbcache/journal.h b/libbcache/journal.h
index 759ed609..9274831a 100644
--- a/libbcache/journal.h
+++ b/libbcache/journal.h
@@ -111,11 +111,7 @@
#include <linux/hash.h>
#include "journal_types.h"
-
-static inline struct jset_entry *jset_keys_next(struct jset_entry *j)
-{
- return (void *) __bkey_idx(j, le16_to_cpu(j->u64s));
-}
+//#include "super-io.h"
/*
* Only used for holding the journal entries we read in btree_journal_read()
@@ -182,7 +178,7 @@ static inline void bch_journal_add_entry_at(struct journal_buf *buf,
unsigned type, enum btree_id id,
unsigned level, unsigned offset)
{
- struct jset_entry *entry = bkey_idx(buf->data, offset);
+ struct jset_entry *entry = vstruct_idx(buf->data, offset);
entry->u64s = cpu_to_le16(u64s);
entry->btree_id = id;
@@ -336,7 +332,7 @@ static inline int bch_journal_error(struct journal *j)
static inline bool is_journal_device(struct cache *ca)
{
- return ca->mi.state == CACHE_ACTIVE && ca->mi.tier == 0;
+ return ca->mi.state == BCH_MEMBER_STATE_ACTIVE && ca->mi.tier == 0;
}
static inline bool journal_flushes_device(struct cache *ca)
@@ -367,21 +363,16 @@ ssize_t bch_journal_print_debug(struct journal *, char *);
int bch_cache_journal_alloc(struct cache *);
-static inline __le64 *__journal_buckets(struct cache_sb *sb)
-{
- return sb->_data + bch_journal_buckets_offset(sb);
-}
-
-static inline u64 journal_bucket(struct cache_sb *sb, unsigned nr)
+static inline unsigned bch_nr_journal_buckets(struct bch_sb_field_journal *j)
{
- return le64_to_cpu(__journal_buckets(sb)[nr]);
-}
-
-static inline void set_journal_bucket(struct cache_sb *sb, unsigned nr, u64 bucket)
-{
- __journal_buckets(sb)[nr] = cpu_to_le64(bucket);
+ return j
+ ? (__le64 *) vstruct_end(&j->field) - j->buckets
+ : 0;
}
int bch_journal_move(struct cache *);
+void bch_journal_free_cache(struct cache *);
+int bch_journal_init_cache(struct cache *);
+
#endif /* _BCACHE_JOURNAL_H */
diff --git a/libbcache/journal_types.h b/libbcache/journal_types.h
index e3698b5a..5c95e37d 100644
--- a/libbcache/journal_types.h
+++ b/libbcache/journal_types.h
@@ -186,7 +186,7 @@ struct journal {
* ugh: need to get prio_buckets converted over to the eventual new
* transaction machinery
*/
- __le64 prio_buckets[MAX_CACHES_PER_SET];
+ __le64 prio_buckets[BCH_SB_MEMBERS_MAX];
unsigned nr_prio_buckets;
unsigned write_delay_ms;
@@ -208,7 +208,7 @@ struct journal {
/*
* Embedded in struct cache. First three fields refer to the array of journal
- * buckets, in cache_sb.
+ * buckets, in bch_sb.
*/
struct journal_device {
/*
@@ -229,6 +229,8 @@ struct journal_device {
* sufficient to read:
*/
unsigned last_idx;
+ unsigned nr;
+ u64 *buckets;
/* Bio for journal reads/writes to this device */
struct bio *bio;
diff --git a/libbcache/migrate.c b/libbcache/migrate.c
index 5a26e228..407ca17e 100644
--- a/libbcache/migrate.c
+++ b/libbcache/migrate.c
@@ -25,7 +25,7 @@ static int issue_migration_move(struct cache *ca,
return -ENOSPC;
extent_for_each_ptr(bkey_s_c_to_extent(k), ptr)
- if (ptr->dev == ca->sb.nr_this_dev)
+ if (ptr->dev == ca->dev_idx)
goto found;
BUG();
@@ -62,7 +62,7 @@ int bch_move_data_off_device(struct cache *ca)
u64 seen_key_count;
int ret = 0;
- BUG_ON(ca->mi.state == CACHE_ACTIVE);
+ BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
bch_move_ctxt_init(&ctxt, NULL, SECTORS_IN_FLIGHT_PER_DEVICE);
ctxt.avoid = ca;
@@ -99,7 +99,7 @@ int bch_move_data_off_device(struct cache *ca)
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch_extent_has_device(bkey_s_c_to_extent(k),
- ca->sb.nr_this_dev))
+ ca->dev_idx))
goto next;
ret = issue_migration_move(ca, &ctxt, k);
@@ -151,14 +151,14 @@ static int bch_move_btree_off(struct cache *ca, enum btree_id id)
struct btree *b;
int ret;
- BUG_ON(ca->mi.state == CACHE_ACTIVE);
+ BUG_ON(ca->mi.state == BCH_MEMBER_STATE_ACTIVE);
closure_init_stack(&cl);
for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
retry:
- if (!bch_extent_has_device(e, ca->sb.nr_this_dev))
+ if (!bch_extent_has_device(e, ca->dev_idx))
continue;
ret = bch_btree_node_rewrite(&iter, b, &cl);
@@ -188,7 +188,7 @@ retry:
for_each_btree_node(&iter, c, id, POS_MIN, 0, b) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
- BUG_ON(bch_extent_has_device(e, ca->sb.nr_this_dev));
+ BUG_ON(bch_extent_has_device(e, ca->dev_idx));
}
bch_btree_iter_unlock(&iter);
}
@@ -282,7 +282,7 @@ static int bch_flag_key_bad(struct btree_iter *iter,
e = bkey_i_to_s_extent(&tmp.key);
extent_for_each_ptr_backwards(e, ptr)
- if (ptr->dev == ca->sb.nr_this_dev)
+ if (ptr->dev == ca->dev_idx)
bch_extent_drop_ptr(e, ptr);
/*
@@ -323,7 +323,7 @@ int bch_flag_data_bad(struct cache *ca)
goto advance;
e = bkey_s_c_to_extent(k);
- if (!bch_extent_has_device(e, ca->sb.nr_this_dev))
+ if (!bch_extent_has_device(e, ca->dev_idx))
goto advance;
ret = bch_flag_key_bad(&iter, ca, e);
diff --git a/libbcache/move.c b/libbcache/move.c
index f3ab9e83..655a5233 100644
--- a/libbcache/move.c
+++ b/libbcache/move.c
@@ -5,7 +5,7 @@
#include "buckets.h"
#include "io.h"
#include "move.h"
-#include "super.h"
+#include "super-io.h"
#include "keylist.h"
#include <linux/ioprio.h>
@@ -63,7 +63,8 @@ static int bch_migrate_index_update(struct bch_write_op *op)
bkey_start_pos(&bch_keylist_front(keys)->k));
while (1) {
- struct bkey_i *insert = bch_keylist_front(keys);
+ struct bkey_s_extent insert =
+ bkey_i_to_s_extent(bch_keylist_front(keys));
struct bkey_s_c k = bch_btree_iter_peek_with_holes(&iter);
struct bch_extent_ptr *ptr;
struct bkey_s_extent e;
@@ -79,17 +80,18 @@ static int bch_migrate_index_update(struct bch_write_op *op)
bkey_reassemble(&new.k, k);
bch_cut_front(iter.pos, &new.k);
- bch_cut_back(insert->k.p, &new.k.k);
+ bch_cut_back(insert.k->p, &new.k.k);
e = bkey_i_to_s_extent(&new.k);
/* hack - promotes can race: */
if (m->promote)
- extent_for_each_ptr(bkey_i_to_s_extent(insert), ptr)
+ extent_for_each_ptr(insert, ptr)
if (bch_extent_has_device(e.c, ptr->dev))
goto nomatch;
ptr = bch_migrate_matching_ptr(m, e);
if (ptr) {
+ int nr_new_dirty = bch_extent_nr_dirty_ptrs(insert.s_c);
unsigned insert_flags =
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL;
@@ -98,17 +100,22 @@ static int bch_migrate_index_update(struct bch_write_op *op)
if (m->move)
insert_flags |= BTREE_INSERT_USE_RESERVE;
- if (m->move)
+ if (m->move) {
+ nr_new_dirty -= !ptr->cached;
__bch_extent_drop_ptr(e, ptr);
+ }
+
+ BUG_ON(nr_new_dirty < 0);
memcpy_u64s(extent_entry_last(e),
- &insert->v,
- bkey_val_u64s(&insert->k));
- e.k->u64s += bkey_val_u64s(&insert->k);
+ insert.v,
+ bkey_val_u64s(insert.k));
+ e.k->u64s += bkey_val_u64s(insert.k);
bch_extent_narrow_crcs(e);
bch_extent_drop_redundant_crcs(e);
bch_extent_normalize(c, e.s);
+ bch_extent_mark_replicas_cached(c, e, nr_new_dirty);
ret = bch_btree_insert_at(c, &op->res,
NULL, op_journal_seq(op),
@@ -148,7 +155,8 @@ void bch_migrate_write_init(struct cache_set *c,
if (move_ptr)
m->move_ptr = *move_ptr;
- if (bkey_extent_is_cached(k.k))
+ if (bkey_extent_is_cached(k.k) ||
+ (move_ptr && move_ptr->cached))
flags |= BCH_WRITE_CACHED;
bch_write_op_init(&m->op, c, &m->wbio,
@@ -160,6 +168,7 @@ void bch_migrate_write_init(struct cache_set *c,
if (m->move)
m->op.alloc_reserve = RESERVE_MOVINGGC;
+ m->op.nonce = extent_current_nonce(bkey_s_c_to_extent(k));
m->op.nr_replicas = 1;
m->op.index_update_fn = bch_migrate_index_update;
}
diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c
index cb4f1654..83407eb1 100644
--- a/libbcache/movinggc.c
+++ b/libbcache/movinggc.c
@@ -28,7 +28,7 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca,
if (bkey_extent_is_data(k.k) &&
(ptr = bch_extent_has_device(bkey_s_c_to_extent(k),
- ca->sb.nr_this_dev)) &&
+ ca->dev_idx)) &&
PTR_BUCKET(ca, ptr)->mark.copygc)
return ptr;
diff --git a/libbcache/notify.c b/libbcache/notify.c
index e9b5568c..3a50f8fb 100644
--- a/libbcache/notify.c
+++ b/libbcache/notify.c
@@ -25,7 +25,7 @@ static void notify_get(struct cache_set *c)
env->envp_idx = 0;
env->buflen = 0;
- notify_var(c, "SET_UUID=%pU", c->disk_sb.user_uuid.b);
+ notify_var(c, "SET_UUID=%pU", c->sb.user_uuid.b);
}
static void notify_get_cache(struct cache *ca)
@@ -34,7 +34,7 @@ static void notify_get_cache(struct cache *ca)
char buf[BDEVNAME_SIZE];
notify_get(c);
- notify_var(c, "UUID=%pU", ca->disk_sb.sb->disk_uuid.b);
+ notify_var(c, "UUID=%pU", ca->uuid.b);
notify_var(c, "BLOCKDEV=%s", bdevname(ca->disk_sb.bdev, buf));
}
diff --git a/libbcache/opts.c b/libbcache/opts.c
index 60a2a4d1..333654eb 100644
--- a/libbcache/opts.c
+++ b/libbcache/opts.c
@@ -29,7 +29,6 @@ const char * const bch_str_hash_types[] = {
"crc32c",
"crc64",
"siphash",
- "sha1",
NULL
};
@@ -70,11 +69,11 @@ const char * const bch_uint_opt[] = {
};
enum bch_opts {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
Opt_##_name,
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
Opt_bad_opt,
};
@@ -144,15 +143,15 @@ static int parse_string_opt(const struct bch_option *opt, const char *s)
static struct bch_opt_result parse_one_opt(const char *opt)
{
static const struct bch_option opt_table[] = {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
[Opt_##_name] = { \
.name = #_name, \
.opts = _choices, \
.min = _min, \
.max = _max, \
},
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
}, *i;
for (i = opt_table;
@@ -186,13 +185,13 @@ int bch_parse_options(struct cache_set_opts *opts, int flags, char *options)
struct bch_opt_result res = parse_one_opt(p);
switch (res.opt) {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
case Opt_##_name: \
opts->_name = res.val; \
break;
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
case Opt_bad_opt:
return -EINVAL;
diff --git a/libbcache/opts.h b/libbcache/opts.h
index 70df232c..1d30848f 100644
--- a/libbcache/opts.h
+++ b/libbcache/opts.h
@@ -30,47 +30,47 @@ extern const char * const bch_bool_opt[];
extern const char * const bch_uint_opt[];
/* dummy option, for options that aren't stored in the superblock */
-LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0);
-
-#define CACHE_SET_VISIBLE_OPTS() \
- CACHE_SET_OPT(verbose_recovery, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, false) \
- CACHE_SET_OPT(posix_acl, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, false) \
- CACHE_SET_OPT(journal_flush_disabled, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- CACHE_SET_OPT(nofsck, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- CACHE_SET_OPT(fix_errors, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, true) \
- CACHE_SET_OPT(nochanges, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- CACHE_SET_OPT(noreplay, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- CACHE_SET_OPT(norecovery, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- CACHE_SET_SB_OPTS()
-
-#define CACHE_SET_OPTS() \
- CACHE_SET_OPT(read_only, \
- bch_bool_opt, 0, 2, \
- NO_SB_OPT, 0) \
- CACHE_SET_VISIBLE_OPTS()
+LE64_BITMASK(NO_SB_OPT, struct bch_sb, flags[0], 0, 0);
+
+#define BCH_VISIBLE_OPTS() \
+ BCH_OPT(verbose_recovery, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, false) \
+ BCH_OPT(posix_acl, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, false) \
+ BCH_OPT(journal_flush_disabled, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, true) \
+ BCH_OPT(nofsck, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, true) \
+ BCH_OPT(fix_errors, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, true) \
+ BCH_OPT(nochanges, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ BCH_OPT(noreplay, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ BCH_OPT(norecovery, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ BCH_SB_OPTS()
+
+#define BCH_OPTS() \
+ BCH_OPT(read_only, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ BCH_VISIBLE_OPTS()
struct cache_set_opts {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
s8 _name;
- CACHE_SET_OPTS()
-#undef CACHE_SET_OPT
+ BCH_OPTS()
+#undef BCH_OPT
};
static inline struct cache_set_opts cache_set_opts_empty(void)
@@ -85,27 +85,27 @@ static inline struct cache_set_opts cache_set_opts_empty(void)
* Initial options from superblock - here we don't want any options undefined,
* any options the superblock doesn't specify are set to 0:
*/
-static inline struct cache_set_opts cache_superblock_opts(struct cache_sb *sb)
+static inline struct cache_set_opts cache_superblock_opts(struct bch_sb *sb)
{
return (struct cache_set_opts) {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
._name = _sb_opt##_BITS ? _sb_opt(sb) : 0,
- CACHE_SET_OPTS()
-#undef CACHE_SET_OPT
+ BCH_SB_OPTS()
+#undef BCH_OPT
};
}
static inline void cache_set_opts_apply(struct cache_set_opts *dst,
struct cache_set_opts src)
{
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm)\
BUILD_BUG_ON(_max > S8_MAX); \
if (src._name >= 0) \
dst->_name = src._name;
- CACHE_SET_OPTS()
-#undef CACHE_SET_OPT
+ BCH_SB_OPTS()
+#undef BCH_OPT
}
int bch_parse_options(struct cache_set_opts *, int, char *);
diff --git a/libbcache/siphash.c b/libbcache/siphash.c
index 5ba80b52..3a6c9c82 100644
--- a/libbcache/siphash.c
+++ b/libbcache/siphash.c
@@ -43,19 +43,46 @@
* https://131002.net/siphash/
*/
-//#include <sys/param.h>
-//#include <sys/systm.h>
-
#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+#include <linux/bitops.h>
#include <linux/string.h>
#include "siphash.h"
-static void SipHash_CRounds(SIPHASH_CTX *, int);
-static void SipHash_Rounds(SIPHASH_CTX *, int);
+static void SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
+{
+ while (rounds--) {
+ ctx->v[0] += ctx->v[1];
+ ctx->v[2] += ctx->v[3];
+ ctx->v[1] = rol64(ctx->v[1], 13);
+ ctx->v[3] = rol64(ctx->v[3], 16);
+
+ ctx->v[1] ^= ctx->v[0];
+ ctx->v[3] ^= ctx->v[2];
+ ctx->v[0] = rol64(ctx->v[0], 32);
+
+ ctx->v[2] += ctx->v[1];
+ ctx->v[0] += ctx->v[3];
+ ctx->v[1] = rol64(ctx->v[1], 17);
+ ctx->v[3] = rol64(ctx->v[3], 21);
+
+ ctx->v[1] ^= ctx->v[2];
+ ctx->v[3] ^= ctx->v[0];
+ ctx->v[2] = rol64(ctx->v[2], 32);
+ }
+}
+
+static void SipHash_CRounds(SIPHASH_CTX *ctx, const void *ptr, int rounds)
+{
+ u64 m = get_unaligned_le64(ptr);
-void
-SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
+ ctx->v[3] ^= m;
+ SipHash_Rounds(ctx, rounds);
+ ctx->v[0] ^= m;
+}
+
+void SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
{
u64 k0, k1;
@@ -71,8 +98,8 @@ SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
ctx->bytes = 0;
}
-void
-SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len)
+void SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf,
+ const void *src, size_t len)
{
const u8 *ptr = src;
size_t left, used;
@@ -88,7 +115,7 @@ SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len)
if (len >= left) {
memcpy(&ctx->buf[used], ptr, left);
- SipHash_CRounds(ctx, rc);
+ SipHash_CRounds(ctx, ctx->buf, rc);
len -= left;
ptr += left;
} else {
@@ -98,8 +125,7 @@ SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len)
}
while (len >= sizeof(ctx->buf)) {
- memcpy(ctx->buf, ptr, sizeof(ctx->buf));
- SipHash_CRounds(ctx, rc);
+ SipHash_CRounds(ctx, ptr, rc);
len -= sizeof(ctx->buf);
ptr += sizeof(ctx->buf);
}
@@ -108,8 +134,7 @@ SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len)
memcpy(&ctx->buf[used], ptr, len);
}
-void
-SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
+void SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
{
u64 r;
@@ -118,8 +143,7 @@ SipHash_Final(void *dst, SIPHASH_CTX *ctx, int rc, int rf)
*((__le64 *) dst) = cpu_to_le64(r);
}
-u64
-SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
+u64 SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
{
u64 r;
size_t left, used;
@@ -129,7 +153,7 @@ SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
memset(&ctx->buf[used], 0, left - 1);
ctx->buf[7] = ctx->bytes;
- SipHash_CRounds(ctx, rc);
+ SipHash_CRounds(ctx, ctx->buf, rc);
ctx->v[2] ^= 0xff;
SipHash_Rounds(ctx, rf);
@@ -138,48 +162,11 @@ SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
return (r);
}
-u64
-SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
+u64 SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
{
SIPHASH_CTX ctx;
SipHash_Init(&ctx, key);
SipHash_Update(&ctx, rc, rf, src, len);
- return (SipHash_End(&ctx, rc, rf));
-}
-
-#define SIP_ROTL(x, b) ((x) << (b)) | ( (x) >> (64 - (b)))
-
-static void
-SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
-{
- while (rounds--) {
- ctx->v[0] += ctx->v[1];
- ctx->v[2] += ctx->v[3];
- ctx->v[1] = SIP_ROTL(ctx->v[1], 13);
- ctx->v[3] = SIP_ROTL(ctx->v[3], 16);
-
- ctx->v[1] ^= ctx->v[0];
- ctx->v[3] ^= ctx->v[2];
- ctx->v[0] = SIP_ROTL(ctx->v[0], 32);
-
- ctx->v[2] += ctx->v[1];
- ctx->v[0] += ctx->v[3];
- ctx->v[1] = SIP_ROTL(ctx->v[1], 17);
- ctx->v[3] = SIP_ROTL(ctx->v[3], 21);
-
- ctx->v[1] ^= ctx->v[2];
- ctx->v[3] ^= ctx->v[0];
- ctx->v[2] = SIP_ROTL(ctx->v[2], 32);
- }
-}
-
-static void
-SipHash_CRounds(SIPHASH_CTX *ctx, int rounds)
-{
- u64 m = le64_to_cpu(*((__le64 *)ctx->buf));
-
- ctx->v[3] ^= m;
- SipHash_Rounds(ctx, rounds);
- ctx->v[0] ^= m;
+ return SipHash_End(&ctx, rc, rf);
}
diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h
index a489304c..b14d05c9 100644
--- a/libbcache/str_hash.h
+++ b/libbcache/str_hash.h
@@ -3,37 +3,74 @@
#include "btree_iter.h"
#include "checksum.h"
+#include "inode.h"
#include "siphash.h"
#include "super.h"
-#include <crypto/sha1_base.h>
#include <linux/crc32c.h>
+#include <crypto/hash.h>
-static const SIPHASH_KEY bch_siphash_key = {
- .k0 = cpu_to_le64(0x5a9585fd80087730ULL),
- .k1 = cpu_to_le64(0xc8de666d50b45664ULL ),
+struct bch_hash_info {
+ u8 type;
+ union {
+ __le64 crc_key;
+ SIPHASH_KEY siphash_key;
+ };
};
+static inline struct bch_hash_info
+bch_hash_info_init(const struct bch_inode_unpacked *bi)
+{
+ /* XXX ick */
+ struct bch_hash_info info = {
+ .type = (bi->i_flags >> INODE_STR_HASH_OFFSET) &
+ ~(~0 << INODE_STR_HASH_BITS)
+ };
+
+ switch (info.type) {
+ case BCH_STR_HASH_CRC32C:
+ case BCH_STR_HASH_CRC64:
+ info.crc_key = bi->i_hash_seed;
+ break;
+ case BCH_STR_HASH_SIPHASH: {
+ SHASH_DESC_ON_STACK(desc, bch_sha256);
+ u8 digest[crypto_shash_digestsize(bch_sha256)];
+
+ desc->tfm = bch_sha256;
+ desc->flags = 0;
+
+ crypto_shash_digest(desc, (void *) &bi->i_hash_seed,
+ sizeof(bi->i_hash_seed), digest);
+ memcpy(&info.siphash_key, digest, sizeof(info.siphash_key));
+ break;
+ }
+ default:
+ BUG();
+ }
+
+ return info;
+}
+
struct bch_str_hash_ctx {
union {
- u32 crc32c;
- u64 crc64;
- SIPHASH_CTX siphash;
+ u32 crc32c;
+ u64 crc64;
+ SIPHASH_CTX siphash;
};
};
static inline void bch_str_hash_init(struct bch_str_hash_ctx *ctx,
- enum bch_str_hash_type type)
+ const struct bch_hash_info *info)
{
- switch (type) {
+ switch (info->type) {
case BCH_STR_HASH_CRC32C:
- ctx->crc32c = ~0;
+ ctx->crc32c = crc32c(~0, &info->crc_key, sizeof(info->crc_key));
break;
case BCH_STR_HASH_CRC64:
- ctx->crc64 = ~0;
+ ctx->crc64 = bch_crc64_update(~0, &info->crc_key, sizeof(info->crc_key));
break;
case BCH_STR_HASH_SIPHASH:
- SipHash24_Init(&ctx->siphash, &bch_siphash_key);
+ SipHash24_Init(&ctx->siphash, &info->siphash_key);
break;
default:
BUG();
@@ -41,10 +78,10 @@ static inline void bch_str_hash_init(struct bch_str_hash_ctx *ctx,
}
static inline void bch_str_hash_update(struct bch_str_hash_ctx *ctx,
- enum bch_str_hash_type type,
- const void *data, size_t len)
+ const struct bch_hash_info *info,
+ const void *data, size_t len)
{
- switch (type) {
+ switch (info->type) {
case BCH_STR_HASH_CRC32C:
ctx->crc32c = crc32c(ctx->crc32c, data, len);
break;
@@ -60,9 +97,9 @@ static inline void bch_str_hash_update(struct bch_str_hash_ctx *ctx,
}
static inline u64 bch_str_hash_end(struct bch_str_hash_ctx *ctx,
- enum bch_str_hash_type type)
+ const struct bch_hash_info *info)
{
- switch (type) {
+ switch (info->type) {
case BCH_STR_HASH_CRC32C:
return ctx->crc32c;
case BCH_STR_HASH_CRC64:
@@ -74,19 +111,6 @@ static inline u64 bch_str_hash_end(struct bch_str_hash_ctx *ctx,
}
}
-struct bch_hash_info {
- u64 seed;
- u8 type;
-};
-
-static inline struct bch_hash_info bch_hash_info_init(const struct bch_inode *bi)
-{
- return (struct bch_hash_info) {
- .seed = le64_to_cpu(bi->i_hash_seed),
- .type = INODE_STR_HASH_TYPE(bi),
- };
-}
-
struct bch_hash_desc {
enum btree_id btree_id;
u8 key_type;
diff --git a/libbcache/super-io.c b/libbcache/super-io.c
new file mode 100644
index 00000000..66338a1c
--- /dev/null
+++ b/libbcache/super-io.c
@@ -0,0 +1,798 @@
+
+#include "bcache.h"
+#include "blockdev.h"
+#include "checksum.h"
+#include "error.h"
+#include "io.h"
+#include "journal.h"
+#include "super-io.h"
+#include "super.h"
+#include "vstructs.h"
+
+#include <linux/backing-dev.h>
+
+static inline void __bch_sb_layout_size_assert(void)
+{
+ BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512);
+}
+
+struct bch_sb_field *bch_sb_field_get(struct bch_sb *sb,
+ enum bch_sb_field_types type)
+{
+ struct bch_sb_field *f;
+
+ /* XXX: need locking around superblock to access optional fields */
+
+ vstruct_for_each(sb, f)
+ if (le32_to_cpu(f->type) == type)
+ return f;
+ return NULL;
+}
+
+void bch_free_super(struct bcache_superblock *sb)
+{
+ if (sb->bio)
+ bio_put(sb->bio);
+ if (!IS_ERR_OR_NULL(sb->bdev))
+ blkdev_put(sb->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+
+ free_pages((unsigned long) sb->sb, sb->page_order);
+ memset(sb, 0, sizeof(*sb));
+}
+
+static int __bch_super_realloc(struct bcache_superblock *sb, unsigned order)
+{
+ struct bch_sb *new_sb;
+ struct bio *bio;
+
+ if (sb->page_order >= order && sb->sb)
+ return 0;
+
+ if (dynamic_fault("bcache:add:super_realloc"))
+ return -ENOMEM;
+
+ bio = bio_kmalloc(GFP_KERNEL, 1 << order);
+ if (!bio)
+ return -ENOMEM;
+
+ if (sb->bio)
+ bio_put(sb->bio);
+ sb->bio = bio;
+
+ new_sb = (void *) __get_free_pages(GFP_KERNEL, order);
+ if (!new_sb)
+ return -ENOMEM;
+
+ if (sb->sb)
+ memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
+
+ free_pages((unsigned long) sb->sb, sb->page_order);
+ sb->sb = new_sb;
+
+ sb->page_order = order;
+
+ return 0;
+}
+
+int bch_dev_sb_realloc(struct bcache_superblock *sb, unsigned u64s)
+{
+ u64 new_bytes = __vstruct_bytes(struct bch_sb, u64s);
+ u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
+
+ if (new_bytes > max_bytes) {
+ char buf[BDEVNAME_SIZE];
+
+ pr_err("%s: superblock too big: want %llu but have %llu",
+ bdevname(sb->bdev, buf), new_bytes, max_bytes);
+ return -ENOSPC;
+ }
+
+ return __bch_super_realloc(sb, get_order(new_bytes));
+}
+
+static int bch_fs_sb_realloc(struct cache_set *c, unsigned u64s)
+{
+ u64 bytes = __vstruct_bytes(struct bch_sb, u64s);
+ struct bch_sb *sb;
+ unsigned order = get_order(bytes);
+
+ if (c->disk_sb && order <= c->disk_sb_order)
+ return 0;
+
+ sb = (void *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ if (!sb)
+ return -ENOMEM;
+
+ if (c->disk_sb)
+ memcpy(sb, c->disk_sb, PAGE_SIZE << c->disk_sb_order);
+
+ free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
+
+ c->disk_sb = sb;
+ c->disk_sb_order = order;
+ return 0;
+}
+
+static struct bch_sb_field *__bch_sb_field_resize(struct bch_sb *sb,
+ struct bch_sb_field *f,
+ unsigned u64s)
+{
+ unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+
+ if (!f) {
+ f = vstruct_last(sb);
+ memset(f, 0, sizeof(u64) * u64s);
+ f->u64s = cpu_to_le32(u64s);
+ f->type = 0;
+ } else {
+ void *src, *dst;
+
+ src = vstruct_end(f);
+ f->u64s = cpu_to_le32(u64s);
+ dst = vstruct_end(f);
+
+ memmove(dst, src, vstruct_end(sb) - src);
+
+ if (dst > src)
+ memset(src, 0, dst - src);
+ }
+
+ le32_add_cpu(&sb->u64s, u64s - old_u64s);
+
+ return f;
+
+}
+
+struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *c,
+ struct bch_sb_field *f,
+ unsigned u64s)
+{
+ ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+ ssize_t d = -old_u64s + u64s;
+ struct cache *ca;
+ unsigned i;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ if (bch_fs_sb_realloc(c, le32_to_cpu(c->disk_sb->u64s) + d))
+ return NULL;
+
+ for_each_cache(ca, c, i) {
+ struct bcache_superblock *sb = &ca->disk_sb;
+
+ if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) {
+ percpu_ref_put(&ca->ref);
+ return NULL;
+ }
+ }
+
+ return __bch_sb_field_resize(c->disk_sb, f, u64s);
+}
+
+struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *sb,
+ struct bch_sb_field *f,
+ unsigned u64s)
+{
+ ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0;
+ ssize_t d = -old_u64s + u64s;
+
+ if (bch_dev_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d))
+ return NULL;
+
+ return __bch_sb_field_resize(sb->sb, f, u64s);
+}
+
+static const char *validate_sb_layout(struct bch_sb_layout *layout)
+{
+ u64 offset, prev_offset, max_sectors;
+ unsigned i;
+
+ if (uuid_le_cmp(layout->magic, BCACHE_MAGIC))
+ return "Not a bcache superblock layout";
+
+ if (layout->layout_type != 0)
+ return "Invalid superblock layout type";
+
+ if (!layout->nr_superblocks)
+ return "Invalid superblock layout: no superblocks";
+
+ if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset))
+ return "Invalid superblock layout: too many superblocks";
+
+ max_sectors = 1 << layout->sb_max_size_bits;
+
+ prev_offset = le64_to_cpu(layout->sb_offset[0]);
+
+ if (prev_offset != BCH_SB_SECTOR)
+ return "Invalid superblock layout: doesn't have default superblock location";
+
+ for (i = 1; i < layout->nr_superblocks; i++) {
+ offset = le64_to_cpu(layout->sb_offset[i]);
+
+ if (offset < prev_offset + max_sectors)
+ return "Invalid superblock layout: superblocks overlap";
+ prev_offset = offset;
+ }
+
+ return NULL;
+}
+
+const char *bch_validate_cache_super(struct bcache_superblock *disk_sb)
+{
+ struct bch_sb *sb = disk_sb->sb;
+ struct bch_sb_field *f;
+ struct bch_sb_field_members *sb_mi;
+ struct bch_sb_field_journal *journal;
+ struct cache_member_cpu mi;
+ const char *err;
+ u16 block_size;
+ unsigned i;
+
+ switch (le64_to_cpu(sb->version)) {
+ case BCACHE_SB_VERSION_CDEV_V4:
+ break;
+ default:
+ return"Unsupported superblock version";
+ }
+
+ if (BCH_SB_INITIALIZED(sb) &&
+ le64_to_cpu(sb->version) != BCACHE_SB_VERSION_CDEV_V4)
+ return "Unsupported superblock version";
+
+ block_size = le16_to_cpu(sb->block_size);
+
+ if (!is_power_of_2(block_size) ||
+ block_size > PAGE_SECTORS)
+ return "Bad block size";
+
+ if (bch_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
+ return "Bad user UUID";
+
+ if (bch_is_zero(sb->uuid.b, sizeof(uuid_le)))
+ return "Bad internal UUID";
+
+ if (!sb->nr_devices ||
+ sb->nr_devices <= sb->dev_idx ||
+ sb->nr_devices > BCH_SB_MEMBERS_MAX)
+ return "Bad cache device number in set";
+
+ if (!BCH_SB_META_REPLICAS_WANT(sb) ||
+ BCH_SB_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
+ return "Invalid number of metadata replicas";
+
+ if (!BCH_SB_META_REPLICAS_HAVE(sb) ||
+ BCH_SB_META_REPLICAS_HAVE(sb) >
+ BCH_SB_META_REPLICAS_WANT(sb))
+ return "Invalid number of metadata replicas";
+
+ if (!BCH_SB_DATA_REPLICAS_WANT(sb) ||
+ BCH_SB_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
+ return "Invalid number of data replicas";
+
+ if (!BCH_SB_DATA_REPLICAS_HAVE(sb) ||
+ BCH_SB_DATA_REPLICAS_HAVE(sb) >
+ BCH_SB_DATA_REPLICAS_WANT(sb))
+ return "Invalid number of data replicas";
+
+ if (!BCH_SB_BTREE_NODE_SIZE(sb))
+ return "Btree node size not set";
+
+ if (!is_power_of_2(BCH_SB_BTREE_NODE_SIZE(sb)))
+ return "Btree node size not a power of two";
+
+ if (BCH_SB_BTREE_NODE_SIZE(sb) > BTREE_NODE_SIZE_MAX)
+ return "Btree node size too large";
+
+ if (BCH_SB_GC_RESERVE(sb) < 5)
+ return "gc reserve percentage too small";
+
+ if (1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) < block_size)
+ return "max journal entry size too small";
+
+ /* 4 mb max: */
+ if (512U << BCH_SB_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX)
+ return "max journal entry size too big";
+
+ if (!sb->time_precision ||
+ le32_to_cpu(sb->time_precision) > NSEC_PER_SEC)
+ return "invalid time precision";
+
+ /* validate layout */
+ err = validate_sb_layout(&sb->layout);
+ if (err)
+ return err;
+
+ vstruct_for_each(sb, f) {
+ if (!f->u64s)
+ return "Invalid superblock: invalid optional field";
+
+ if (vstruct_next(f) > vstruct_last(sb))
+ return "Invalid superblock: invalid optional field";
+
+ if (le32_to_cpu(f->type) >= BCH_SB_FIELD_NR)
+ return "Invalid superblock: unknown optional field type";
+ }
+
+ /* Validate member info: */
+ sb_mi = bch_sb_get_members(sb);
+ if (!sb_mi)
+ return "Invalid superblock: member info area missing";
+
+ if ((void *) (sb_mi->members + sb->nr_devices) >
+ vstruct_end(&sb_mi->field))
+ return "Invalid superblock: bad member info";
+
+ mi = cache_mi_to_cpu_mi(sb_mi->members + sb->dev_idx);
+
+ for (i = 0; i < sb->layout.nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(sb->layout.sb_offset[i]);
+ u64 max_size = 1 << sb->layout.sb_max_size_bits;
+
+ if (offset + max_size > mi.first_bucket * mi.bucket_size)
+ return "Invalid superblock: first bucket comes before end of super";
+ }
+
+ if (mi.nbuckets > LONG_MAX)
+ return "Too many buckets";
+
+ if (mi.nbuckets - mi.first_bucket < 1 << 10)
+ return "Not enough buckets";
+
+ if (!is_power_of_2(mi.bucket_size) ||
+ mi.bucket_size < PAGE_SECTORS ||
+ mi.bucket_size < block_size)
+ return "Bad bucket size";
+
+ if (get_capacity(disk_sb->bdev->bd_disk) <
+ mi.bucket_size * mi.nbuckets)
+ return "Invalid superblock: device too small";
+
+ /* Validate journal buckets: */
+ journal = bch_sb_get_journal(sb);
+ if (journal) {
+ for (i = 0; i < bch_nr_journal_buckets(journal); i++) {
+ u64 b = le64_to_cpu(journal->buckets[i]);
+
+ if (b < mi.first_bucket || b >= mi.nbuckets)
+ return "bad journal bucket";
+ }
+ }
+
+ return NULL;
+}
+
+/* device open: */
+
+static bool bch_is_open_cache(struct block_device *bdev)
+{
+ struct cache_set *c;
+ struct cache *ca;
+ unsigned i;
+
+ rcu_read_lock();
+ list_for_each_entry(c, &bch_cache_sets, list)
+ for_each_cache_rcu(ca, c, i)
+ if (ca->disk_sb.bdev == bdev) {
+ rcu_read_unlock();
+ return true;
+ }
+ rcu_read_unlock();
+ return false;
+}
+
+static bool bch_is_open(struct block_device *bdev)
+{
+ lockdep_assert_held(&bch_register_lock);
+
+ return bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
+}
+
+static const char *bch_blkdev_open(const char *path, void *holder,
+ struct cache_set_opts opts,
+ struct block_device **ret)
+{
+ struct block_device *bdev;
+ fmode_t mode = opts.nochanges > 0
+ ? FMODE_READ
+ : FMODE_READ|FMODE_WRITE|FMODE_EXCL;
+ const char *err;
+
+ *ret = NULL;
+ bdev = blkdev_get_by_path(path, mode, holder);
+
+ if (bdev == ERR_PTR(-EBUSY)) {
+ bdev = lookup_bdev(path);
+ if (IS_ERR(bdev))
+ return "device busy";
+
+ err = bch_is_open(bdev)
+ ? "device already registered"
+ : "device busy";
+
+ bdput(bdev);
+ return err;
+ }
+
+ if (IS_ERR(bdev))
+ return "failed to open device";
+
+ bdev_get_queue(bdev)->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
+
+ *ret = bdev;
+ return NULL;
+}
+
+/* Update cached mi: */
+int bch_cache_set_mi_update(struct cache_set *c,
+ struct bch_member *mi,
+ unsigned nr_devices)
+{
+ struct cache_member_rcu *new, *old;
+ struct cache *ca;
+ unsigned i;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ new = kzalloc(sizeof(struct cache_member_rcu) +
+ sizeof(struct cache_member_cpu) * nr_devices,
+ GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ new->nr_devices = nr_devices;
+
+ for (i = 0; i < nr_devices; i++)
+ new->m[i] = cache_mi_to_cpu_mi(&mi[i]);
+
+ rcu_read_lock();
+ for_each_cache(ca, c, i)
+ ca->mi = new->m[i];
+ rcu_read_unlock();
+
+ old = rcu_dereference_protected(c->members,
+ lockdep_is_held(&c->sb_lock));
+
+ rcu_assign_pointer(c->members, new);
+ if (old)
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+static void bch_sb_update(struct cache_set *c)
+{
+ struct bch_sb *src = c->disk_sb;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ c->sb.uuid = src->uuid;
+ c->sb.user_uuid = src->user_uuid;
+ c->sb.block_size = le16_to_cpu(src->block_size);
+ c->sb.btree_node_size = BCH_SB_BTREE_NODE_SIZE(src);
+ c->sb.nr_devices = src->nr_devices;
+ c->sb.clean = BCH_SB_CLEAN(src);
+ c->sb.meta_replicas_have= BCH_SB_META_REPLICAS_HAVE(src);
+ c->sb.data_replicas_have= BCH_SB_DATA_REPLICAS_HAVE(src);
+ c->sb.str_hash_type = BCH_SB_STR_HASH_TYPE(src);
+ c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src);
+ c->sb.time_base_lo = le64_to_cpu(src->time_base_lo);
+ c->sb.time_base_hi = le32_to_cpu(src->time_base_hi);
+ c->sb.time_precision = le32_to_cpu(src->time_precision);
+}
+
+/* doesn't copy member info */
+static void __copy_super(struct bch_sb *dst, struct bch_sb *src)
+{
+ struct bch_sb_field *src_f, *dst_f;
+
+ dst->version = src->version;
+ dst->seq = src->seq;
+ dst->uuid = src->uuid;
+ dst->user_uuid = src->user_uuid;
+ memcpy(dst->label, src->label, sizeof(dst->label));
+
+ dst->block_size = src->block_size;
+ dst->nr_devices = src->nr_devices;
+
+ dst->time_base_lo = src->time_base_lo;
+ dst->time_base_hi = src->time_base_hi;
+ dst->time_precision = src->time_precision;
+
+ memcpy(dst->flags, src->flags, sizeof(dst->flags));
+ memcpy(dst->features, src->features, sizeof(dst->features));
+ memcpy(dst->compat, src->compat, sizeof(dst->compat));
+
+ vstruct_for_each(src, src_f) {
+ if (src_f->type == BCH_SB_FIELD_journal)
+ continue;
+
+ dst_f = bch_sb_field_get(dst, src_f->type);
+ dst_f = __bch_sb_field_resize(dst, dst_f,
+ le32_to_cpu(src_f->u64s));
+
+ memcpy(dst_f, src_f, vstruct_bytes(src_f));
+ }
+}
+
+int bch_sb_to_cache_set(struct cache_set *c, struct bch_sb *src)
+{
+ struct bch_sb_field_members *members =
+ bch_sb_get_members(src);
+ struct bch_sb_field_journal *journal_buckets =
+ bch_sb_get_journal(src);
+ unsigned journal_u64s = journal_buckets
+ ? le32_to_cpu(journal_buckets->field.u64s)
+ : 0;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ if (bch_fs_sb_realloc(c, le32_to_cpu(src->u64s) - journal_u64s))
+ return -ENOMEM;
+
+ if (bch_cache_set_mi_update(c, members->members, src->nr_devices))
+ return -ENOMEM;
+
+ __copy_super(c->disk_sb, src);
+ bch_sb_update(c);
+
+ return 0;
+}
+
+int bch_sb_from_cache_set(struct cache_set *c, struct cache *ca)
+{
+ struct bch_sb *src = c->disk_sb, *dst = ca->disk_sb.sb;
+ struct bch_sb_field_journal *journal_buckets =
+ bch_sb_get_journal(dst);
+ unsigned journal_u64s = journal_buckets
+ ? le32_to_cpu(journal_buckets->field.u64s)
+ : 0;
+ unsigned u64s = le32_to_cpu(src->u64s) + journal_u64s;
+ int ret;
+
+ ret = bch_dev_sb_realloc(&ca->disk_sb, u64s);
+ if (ret)
+ return ret;
+
+ __copy_super(dst, src);
+
+ return 0;
+}
+
+/* read superblock: */
+
+static const char *read_one_super(struct bcache_superblock *sb, u64 offset)
+{
+ struct bch_csum csum;
+ size_t bytes;
+ unsigned order;
+reread:
+ bio_reset(sb->bio);
+ sb->bio->bi_bdev = sb->bdev;
+ sb->bio->bi_iter.bi_sector = BCH_SB_SECTOR;
+ sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
+ bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+ bch_bio_map(sb->bio, sb->sb);
+
+ if (submit_bio_wait(sb->bio))
+ return "IO error";
+
+ if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
+ return "Not a bcache superblock";
+
+ if (le64_to_cpu(sb->sb->version) != BCACHE_SB_VERSION_CDEV_V4)
+ return "Unsupported superblock version";
+
+ bytes = vstruct_bytes(sb->sb);
+
+ if (bytes > 512 << sb->sb->layout.sb_max_size_bits)
+ return "Bad superblock: too big";
+
+ order = get_order(bytes);
+ if (order > sb->page_order) {
+ if (__bch_super_realloc(sb, order))
+ return "cannot allocate memory";
+ goto reread;
+ }
+
+ if (BCH_SB_CSUM_TYPE(sb->sb) >= BCH_CSUM_NR)
+ return "unknown csum type";
+
+ /* XXX: verify MACs */
+ csum = csum_vstruct(NULL, BCH_SB_CSUM_TYPE(sb->sb),
+ (struct nonce) { 0 }, sb->sb);
+
+ if (bch_crc_cmp(csum, sb->sb->csum))
+ return "bad checksum reading superblock";
+
+ return NULL;
+}
+
+const char *bch_read_super(struct bcache_superblock *sb,
+ struct cache_set_opts opts,
+ const char *path)
+{
+ struct bch_sb_layout layout;
+ const char *err;
+ unsigned i;
+
+ lockdep_assert_held(&bch_register_lock);
+
+ memset(sb, 0, sizeof(*sb));
+
+ err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
+ if (err)
+ return err;
+
+ err = "cannot allocate memory";
+ if (__bch_super_realloc(sb, 0))
+ goto err;
+
+ err = "dynamic fault";
+ if (cache_set_init_fault("read_super"))
+ goto err;
+
+ err = read_one_super(sb, BCH_SB_SECTOR);
+ if (!err)
+ goto got_super;
+
+ pr_err("error reading default super: %s", err);
+
+ /*
+ * Error reading primary superblock - read location of backup
+ * superblocks:
+ */
+ bio_reset(sb->bio);
+ sb->bio->bi_bdev = sb->bdev;
+ sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR;
+ sb->bio->bi_iter.bi_size = sizeof(struct bch_sb_layout);
+ bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+ /*
+ * use sb buffer to read layout, since sb buffer is page aligned but
+ * layout won't be:
+ */
+ bch_bio_map(sb->bio, sb->sb);
+
+ err = "IO error";
+ if (submit_bio_wait(sb->bio))
+ goto err;
+
+ memcpy(&layout, sb->sb, sizeof(layout));
+ err = validate_sb_layout(&layout);
+ if (err)
+ goto err;
+
+ for (i = 0; i < layout.nr_superblocks; i++) {
+ u64 offset = le64_to_cpu(layout.sb_offset[i]);
+
+ if (offset == BCH_SB_SECTOR)
+ continue;
+
+ err = read_one_super(sb, offset);
+ if (!err)
+ goto got_super;
+ }
+ goto err;
+got_super:
+ pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
+ le64_to_cpu(sb->sb->version),
+ le64_to_cpu(sb->sb->flags),
+ le64_to_cpu(sb->sb->seq),
+ le16_to_cpu(sb->sb->u64s));
+
+ err = "Superblock block size smaller than device block size";
+ if (le16_to_cpu(sb->sb->block_size) << 9 <
+ bdev_logical_block_size(sb->bdev))
+ goto err;
+
+ return NULL;
+err:
+ bch_free_super(sb);
+ return err;
+}
+
+/* write superblock: */
+
+static void write_super_endio(struct bio *bio)
+{
+ struct cache *ca = bio->bi_private;
+
+ /* XXX: return errors directly */
+
+ cache_fatal_io_err_on(bio->bi_error, ca, "superblock write");
+
+ bch_account_io_completion(ca);
+
+ closure_put(&ca->set->sb_write);
+ percpu_ref_put(&ca->ref);
+}
+
+static bool write_one_super(struct cache_set *c, struct cache *ca, unsigned idx)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ struct bio *bio = ca->disk_sb.bio;
+
+ if (idx >= sb->layout.nr_superblocks)
+ return false;
+
+ sb->offset = sb->layout.sb_offset[idx];
+
+ SET_BCH_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
+ sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb),
+ (struct nonce) { 0 }, sb);
+
+ bio_reset(bio);
+ bio->bi_bdev = ca->disk_sb.bdev;
+ bio->bi_iter.bi_sector = le64_to_cpu(sb->offset);
+ bio->bi_iter.bi_size =
+ roundup(vstruct_bytes(sb),
+ bdev_logical_block_size(ca->disk_sb.bdev));
+ bio->bi_end_io = write_super_endio;
+ bio->bi_private = ca;
+ bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
+ bch_bio_map(bio, sb);
+
+ percpu_ref_get(&ca->ref);
+ closure_bio_submit_punt(bio, &c->sb_write, c);
+
+ return true;
+}
+
+void bch_write_super(struct cache_set *c)
+{
+ struct bch_sb_field_members *members =
+ bch_sb_get_members(c->disk_sb);
+ struct closure *cl = &c->sb_write;
+ struct cache *ca;
+ unsigned i, super_idx = 0;
+ bool wrote;
+
+ lockdep_assert_held(&c->sb_lock);
+
+ closure_init_stack(cl);
+
+ le64_add_cpu(&c->disk_sb->seq, 1);
+
+ for_each_cache(ca, c, i)
+ bch_sb_from_cache_set(c, ca);
+
+ do {
+ wrote = false;
+ for_each_cache(ca, c, i)
+ if (write_one_super(c, ca, super_idx))
+ wrote = true;
+
+ closure_sync(cl);
+ super_idx++;
+ } while (wrote);
+
+ /* Make new options visible after they're persistent: */
+ bch_cache_set_mi_update(c, members->members, c->sb.nr_devices);
+ bch_sb_update(c);
+}
+
+void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
+ bool meta)
+{
+ struct bch_member *mi;
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
+ const struct bch_extent_ptr *ptr;
+
+ mutex_lock(&c->sb_lock);
+
+ /* recheck, might have raced */
+ if (bch_check_super_marked(c, k, meta)) {
+ mutex_unlock(&c->sb_lock);
+ return;
+ }
+
+ mi = bch_sb_get_members(c->disk_sb)->members;
+
+ extent_for_each_ptr(e, ptr)
+ if (!ptr->cached)
+ (meta
+ ? SET_BCH_MEMBER_HAS_METADATA
+ : SET_BCH_MEMBER_HAS_DATA)(mi + ptr->dev, true);
+
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/libbcache/super-io.h b/libbcache/super-io.h
new file mode 100644
index 00000000..1eda57bc
--- /dev/null
+++ b/libbcache/super-io.h
@@ -0,0 +1,141 @@
+#ifndef _BCACHE_SUPER_IO_H
+#define _BCACHE_SUPER_IO_H
+
+#include "extents.h"
+#include "super_types.h"
+
+#include <asm/byteorder.h>
+
+struct bch_sb_field *bch_sb_field_get(struct bch_sb *, enum bch_sb_field_types);
+
+#define BCH_SB_FIELD_TYPE(_name) \
+static inline struct bch_sb_field_##_name * \
+bch_sb_get_##_name(struct bch_sb *sb) \
+{ \
+ struct bch_sb_field *f = \
+ bch_sb_field_get(sb, BCH_SB_FIELD_##_name); \
+ \
+ return container_of_or_null(f, struct bch_sb_field_##_name, field);\
+}
+
+BCH_SB_FIELD_TYPE(journal);
+BCH_SB_FIELD_TYPE(members);
+BCH_SB_FIELD_TYPE(crypt);
+
+static inline bool bch_sb_test_feature(struct bch_sb *sb,
+ enum bch_sb_features f)
+{
+ unsigned w = f / 64;
+ unsigned b = f % 64;
+
+ return le64_to_cpu(sb->features[w]) & (1ULL << b);
+}
+
+static inline void bch_sb_set_feature(struct bch_sb *sb,
+ enum bch_sb_features f)
+{
+ if (!bch_sb_test_feature(sb, f)) {
+ unsigned w = f / 64;
+ unsigned b = f % 64;
+
+ le64_add_cpu(&sb->features[w], 1ULL << b);
+ }
+}
+
+static inline __le64 bch_sb_magic(struct cache_set *c)
+{
+ __le64 ret;
+ memcpy(&ret, &c->sb.uuid, sizeof(ret));
+ return ret;
+}
+
+static inline __u64 jset_magic(struct cache_set *c)
+{
+ return __le64_to_cpu(bch_sb_magic(c) ^ JSET_MAGIC);
+}
+
+static inline __u64 pset_magic(struct cache_set *c)
+{
+ return __le64_to_cpu(bch_sb_magic(c) ^ PSET_MAGIC);
+}
+
+static inline __u64 bset_magic(struct cache_set *c)
+{
+ return __le64_to_cpu(bch_sb_magic(c) ^ BSET_MAGIC);
+}
+
+static inline struct cache_member_cpu cache_mi_to_cpu_mi(struct bch_member *mi)
+{
+ return (struct cache_member_cpu) {
+ .nbuckets = le64_to_cpu(mi->nbuckets),
+ .first_bucket = le16_to_cpu(mi->first_bucket),
+ .bucket_size = le16_to_cpu(mi->bucket_size),
+ .state = BCH_MEMBER_STATE(mi),
+ .tier = BCH_MEMBER_TIER(mi),
+ .has_metadata = BCH_MEMBER_HAS_METADATA(mi),
+ .has_data = BCH_MEMBER_HAS_DATA(mi),
+ .replacement = BCH_MEMBER_REPLACEMENT(mi),
+ .discard = BCH_MEMBER_DISCARD(mi),
+ .valid = !bch_is_zero(mi->uuid.b, sizeof(uuid_le)),
+ };
+}
+
+int bch_cache_set_mi_update(struct cache_set *, struct bch_member *, unsigned);
+
+int bch_sb_to_cache_set(struct cache_set *, struct bch_sb *);
+int bch_sb_from_cache_set(struct cache_set *, struct cache *);
+
+struct bch_sb_field *bch_fs_sb_field_resize(struct cache_set *,
+ struct bch_sb_field *, unsigned);
+struct bch_sb_field *bch_dev_sb_field_resize(struct bcache_superblock *,
+ struct bch_sb_field *, unsigned);
+
+void bch_free_super(struct bcache_superblock *);
+int bch_super_realloc(struct bcache_superblock *, unsigned);
+
+const char *bch_validate_cache_super(struct bcache_superblock *);
+
+const char *bch_read_super(struct bcache_superblock *,
+ struct cache_set_opts, const char *);
+void bch_write_super(struct cache_set *);
+
+void bch_check_mark_super_slowpath(struct cache_set *,
+ const struct bkey_i *, bool);
+
+#define cache_member_info_get(_c) \
+ (rcu_read_lock(), rcu_dereference((_c)->members))
+
+#define cache_member_info_put() rcu_read_unlock()
+
+static inline bool bch_check_super_marked(struct cache_set *c,
+ const struct bkey_i *k, bool meta)
+{
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
+ const struct bch_extent_ptr *ptr;
+ struct cache_member_cpu *mi = cache_member_info_get(c)->m;
+ bool ret = true;
+
+ extent_for_each_ptr(e, ptr)
+ if (!ptr->cached &&
+ !(meta
+ ? mi[ptr->dev].has_metadata
+ : mi[ptr->dev].has_data)) {
+ ret = false;
+ break;
+ }
+
+ cache_member_info_put();
+
+ return ret;
+}
+
+static inline void bch_check_mark_super(struct cache_set *c,
+ const struct bkey_i *k, bool meta)
+{
+ if (bch_check_super_marked(c, k, meta))
+ return;
+
+ bch_check_mark_super_slowpath(c, k, meta);
+}
+
+#endif /* _BCACHE_SUPER_IO_H */
diff --git a/libbcache/super.c b/libbcache/super.c
index 296700b3..c026c0dd 100644
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -31,12 +31,14 @@
#include "notify.h"
#include "stats.h"
#include "super.h"
+#include "super-io.h"
#include "tier.h"
#include "writeback.h"
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/debugfs.h>
+#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include <linux/kthread.h>
@@ -69,70 +71,11 @@ static struct device *bch_chardev;
static DEFINE_IDR(bch_chardev_minor);
static DECLARE_WAIT_QUEUE_HEAD(bch_read_only_wait);
struct workqueue_struct *bcache_io_wq;
-struct crypto_shash *bch_sha1;
+struct crypto_shash *bch_sha256;
static void bch_cache_stop(struct cache *);
static int bch_cache_online(struct cache *);
-static bool bch_is_open_cache(struct block_device *bdev)
-{
- struct cache_set *c;
- struct cache *ca;
- unsigned i;
-
- rcu_read_lock();
- list_for_each_entry(c, &bch_cache_sets, list)
- for_each_cache_rcu(ca, c, i)
- if (ca->disk_sb.bdev == bdev) {
- rcu_read_unlock();
- return true;
- }
- rcu_read_unlock();
- return false;
-}
-
-static bool bch_is_open(struct block_device *bdev)
-{
- lockdep_assert_held(&bch_register_lock);
-
- return bch_is_open_cache(bdev) || bch_is_open_backing_dev(bdev);
-}
-
-static const char *bch_blkdev_open(const char *path, void *holder,
- struct cache_set_opts opts,
- struct block_device **ret)
-{
- struct block_device *bdev;
- fmode_t mode = opts.nochanges > 0
- ? FMODE_READ
- : FMODE_READ|FMODE_WRITE|FMODE_EXCL;
- const char *err;
-
- *ret = NULL;
- bdev = blkdev_get_by_path(path, mode, holder);
-
- if (bdev == ERR_PTR(-EBUSY)) {
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
- return "device busy";
-
- err = bch_is_open(bdev)
- ? "device already registered"
- : "device busy";
-
- bdput(bdev);
- return err;
- }
-
- if (IS_ERR(bdev))
- return "failed to open device";
-
- bdev_get_queue(bdev)->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES;
-
- *ret = bdev;
- return NULL;
-}
-
static int bch_congested_fn(void *data, int bdi_bits)
{
struct backing_dev_info *bdi;
@@ -168,520 +111,6 @@ static int bch_congested_fn(void *data, int bdi_bits)
return ret;
}
-/* Superblock */
-
-static struct cache_member_cpu cache_mi_to_cpu_mi(struct cache_member *mi)
-{
- return (struct cache_member_cpu) {
- .nbuckets = le64_to_cpu(mi->nbuckets),
- .first_bucket = le16_to_cpu(mi->first_bucket),
- .bucket_size = le16_to_cpu(mi->bucket_size),
- .state = CACHE_STATE(mi),
- .tier = CACHE_TIER(mi),
- .replication_set= CACHE_REPLICATION_SET(mi),
- .has_metadata = CACHE_HAS_METADATA(mi),
- .has_data = CACHE_HAS_DATA(mi),
- .replacement = CACHE_REPLACEMENT(mi),
- .discard = CACHE_DISCARD(mi),
- .valid = !bch_is_zero(mi->uuid.b, sizeof(uuid_le)),
- };
-}
-
-static const char *validate_cache_super(struct bcache_superblock *disk_sb)
-{
- struct cache_sb *sb = disk_sb->sb;
- struct cache_member_cpu mi;
- u16 block_size;
- unsigned i;
-
- switch (le64_to_cpu(sb->version)) {
- case BCACHE_SB_VERSION_CDEV_V0:
- case BCACHE_SB_VERSION_CDEV_WITH_UUID:
- case BCACHE_SB_VERSION_CDEV_V2:
- case BCACHE_SB_VERSION_CDEV_V3:
- break;
- default:
- return"Unsupported superblock version";
- }
-
- if (CACHE_SET_SYNC(sb) &&
- le64_to_cpu(sb->version) != BCACHE_SB_VERSION_CDEV_V3)
- return "Unsupported superblock version";
-
- block_size = le16_to_cpu(sb->block_size);
-
- if (!is_power_of_2(block_size) ||
- block_size > PAGE_SECTORS)
- return "Bad block size";
-
- if (bch_is_zero(sb->disk_uuid.b, sizeof(uuid_le)))
- return "Bad disk UUID";
-
- if (bch_is_zero(sb->user_uuid.b, sizeof(uuid_le)))
- return "Bad user UUID";
-
- if (bch_is_zero(sb->set_uuid.b, sizeof(uuid_le)))
- return "Bad set UUID";
-
- if (!sb->nr_in_set ||
- sb->nr_in_set <= sb->nr_this_dev ||
- sb->nr_in_set > MAX_CACHES_PER_SET)
- return "Bad cache device number in set";
-
- if (!CACHE_SET_META_REPLICAS_WANT(sb) ||
- CACHE_SET_META_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
- return "Invalid number of metadata replicas";
-
- if (!CACHE_SET_META_REPLICAS_HAVE(sb) ||
- CACHE_SET_META_REPLICAS_HAVE(sb) >
- CACHE_SET_META_REPLICAS_WANT(sb))
- return "Invalid number of metadata replicas";
-
- if (!CACHE_SET_DATA_REPLICAS_WANT(sb) ||
- CACHE_SET_DATA_REPLICAS_WANT(sb) >= BCH_REPLICAS_MAX)
- return "Invalid number of data replicas";
-
- if (!CACHE_SET_DATA_REPLICAS_HAVE(sb) ||
- CACHE_SET_DATA_REPLICAS_HAVE(sb) >
- CACHE_SET_DATA_REPLICAS_WANT(sb))
- return "Invalid number of data replicas";
-
- if (CACHE_SB_CSUM_TYPE(sb) >= BCH_CSUM_NR)
- return "Invalid checksum type";
-
- if (!CACHE_SET_BTREE_NODE_SIZE(sb))
- return "Btree node size not set";
-
- if (!is_power_of_2(CACHE_SET_BTREE_NODE_SIZE(sb)))
- return "Btree node size not a power of two";
-
- if (CACHE_SET_BTREE_NODE_SIZE(sb) > BTREE_NODE_SIZE_MAX)
- return "Btree node size too large";
-
- /* Default value, for old filesystems: */
- if (!CACHE_SET_GC_RESERVE(sb))
- SET_CACHE_SET_GC_RESERVE(sb, 10);
-
- if (CACHE_SET_GC_RESERVE(sb) < 5)
- return "gc reserve percentage too small";
-
- if (!CACHE_SET_JOURNAL_ENTRY_SIZE(sb))
- SET_CACHE_SET_JOURNAL_ENTRY_SIZE(sb, 9);
-
- /* 4 mb max: */
- if (512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb) > JOURNAL_ENTRY_SIZE_MAX)
- return "max journal entry size too big";
-
- if (le16_to_cpu(sb->u64s) < bch_journal_buckets_offset(sb))
- return "Invalid superblock: member info area missing";
-
- mi = cache_mi_to_cpu_mi(sb->members + sb->nr_this_dev);
-
- if (mi.nbuckets > LONG_MAX)
- return "Too many buckets";
-
- if (mi.nbuckets < 1 << 8)
- return "Not enough buckets";
-
- if (!is_power_of_2(mi.bucket_size) ||
- mi.bucket_size < PAGE_SECTORS ||
- mi.bucket_size < block_size)
- return "Bad bucket size";
-
- if (get_capacity(disk_sb->bdev->bd_disk) <
- mi.bucket_size * mi.nbuckets)
- return "Invalid superblock: device too small";
-
- if (le64_to_cpu(sb->offset) +
- (__set_blocks(sb, le16_to_cpu(sb->u64s),
- block_size << 9) * block_size) >
- mi.first_bucket * mi.bucket_size)
- return "Invalid superblock: first bucket comes before end of super";
-
- for (i = 0; i < bch_nr_journal_buckets(sb); i++)
- if (journal_bucket(sb, i) < mi.first_bucket ||
- journal_bucket(sb, i) >= mi.nbuckets)
- return "bad journal bucket";
-
- return NULL;
-}
-
-void free_super(struct bcache_superblock *sb)
-{
- if (sb->bio)
- bio_put(sb->bio);
- if (!IS_ERR_OR_NULL(sb->bdev))
- blkdev_put(sb->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-
- free_pages((unsigned long) sb->sb, sb->page_order);
- memset(sb, 0, sizeof(*sb));
-}
-
-static int __bch_super_realloc(struct bcache_superblock *sb, unsigned order)
-{
- struct cache_sb *new_sb;
- struct bio *bio;
-
- if (sb->page_order >= order && sb->sb)
- return 0;
-
- new_sb = (void *) __get_free_pages(GFP_KERNEL, order);
- if (!new_sb)
- return -ENOMEM;
-
- bio = (dynamic_fault("bcache:add:super_realloc")
- ? NULL
- : bio_kmalloc(GFP_KERNEL, 1 << order));
- if (!bio) {
- free_pages((unsigned long) new_sb, order);
- return -ENOMEM;
- }
-
- if (sb->sb)
- memcpy(new_sb, sb->sb, PAGE_SIZE << sb->page_order);
-
- free_pages((unsigned long) sb->sb, sb->page_order);
- sb->sb = new_sb;
-
- if (sb->bio)
- bio_put(sb->bio);
- sb->bio = bio;
-
- sb->page_order = order;
-
- return 0;
-}
-
-int bch_super_realloc(struct bcache_superblock *sb, unsigned u64s)
-{
- struct cache_member *mi = sb->sb->members + sb->sb->nr_this_dev;
- char buf[BDEVNAME_SIZE];
- size_t bytes = __set_bytes((struct cache_sb *) NULL, u64s);
- u64 want = bytes + (SB_SECTOR << 9);
-
- u64 first_bucket_offset = (u64) le16_to_cpu(mi->first_bucket) *
- ((u64) le16_to_cpu(mi->bucket_size) << 9);
-
- if (want > first_bucket_offset) {
- pr_err("%s: superblock too big: want %llu but have %llu",
- bdevname(sb->bdev, buf), want, first_bucket_offset);
- return -ENOSPC;
- }
-
- return __bch_super_realloc(sb, get_order(bytes));
-}
-
-static const char *read_super(struct bcache_superblock *sb,
- struct cache_set_opts opts,
- const char *path)
-{
- const char *err;
- unsigned order = 0;
-
- lockdep_assert_held(&bch_register_lock);
-
- memset(sb, 0, sizeof(*sb));
-
- err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
- if (err)
- return err;
-retry:
- err = "cannot allocate memory";
- if (__bch_super_realloc(sb, order))
- goto err;
-
- err = "dynamic fault";
- if (cache_set_init_fault("read_super"))
- goto err;
-
- bio_reset(sb->bio);
- sb->bio->bi_bdev = sb->bdev;
- sb->bio->bi_iter.bi_sector = SB_SECTOR;
- sb->bio->bi_iter.bi_size = PAGE_SIZE << sb->page_order;
- bio_set_op_attrs(sb->bio, REQ_OP_READ, REQ_SYNC|REQ_META);
- bch_bio_map(sb->bio, sb->sb);
-
- err = "IO error";
- if (submit_bio_wait(sb->bio))
- goto err;
-
- err = "Not a bcache superblock";
- if (uuid_le_cmp(sb->sb->magic, BCACHE_MAGIC))
- goto err;
-
- err = "Superblock has incorrect offset";
- if (le64_to_cpu(sb->sb->offset) != SB_SECTOR)
- goto err;
-
- pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
- le64_to_cpu(sb->sb->version),
- le64_to_cpu(sb->sb->flags),
- le64_to_cpu(sb->sb->seq),
- le16_to_cpu(sb->sb->u64s));
-
- err = "Superblock block size smaller than device block size";
- if (le16_to_cpu(sb->sb->block_size) << 9 <
- bdev_logical_block_size(sb->bdev))
- goto err;
-
- order = get_order(__set_bytes(sb->sb, le16_to_cpu(sb->sb->u64s)));
- if (order > sb->page_order)
- goto retry;
-
- err = "bad checksum reading superblock";
- if (le64_to_cpu(sb->sb->csum) !=
- __csum_set(sb->sb, le16_to_cpu(sb->sb->u64s),
- le64_to_cpu(sb->sb->version) <
- BCACHE_SB_VERSION_CDEV_V3
- ? BCH_CSUM_CRC64
- : CACHE_SB_CSUM_TYPE(sb->sb)))
- goto err;
-
- return NULL;
-err:
- free_super(sb);
- return err;
-}
-
-void __write_super(struct cache_set *c, struct bcache_superblock *disk_sb)
-{
- struct cache_sb *sb = disk_sb->sb;
- struct bio *bio = disk_sb->bio;
-
- bio->bi_bdev = disk_sb->bdev;
- bio->bi_iter.bi_sector = SB_SECTOR;
- bio->bi_iter.bi_size =
- roundup(__set_bytes(sb, le16_to_cpu(sb->u64s)),
- bdev_logical_block_size(disk_sb->bdev));
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
- bch_bio_map(bio, sb);
-
- pr_debug("ver %llu, flags %llu, seq %llu",
- le64_to_cpu(sb->version),
- le64_to_cpu(sb->flags),
- le64_to_cpu(sb->seq));
-
- bch_generic_make_request(bio, c);
-}
-
-static void write_super_endio(struct bio *bio)
-{
- struct cache *ca = bio->bi_private;
-
- /* XXX: return errors directly */
-
- cache_fatal_io_err_on(bio->bi_error, ca, "superblock write");
-
- bch_account_io_completion(ca);
-
- closure_put(&ca->set->sb_write);
- percpu_ref_put(&ca->ref);
-}
-
-static void bcache_write_super_unlock(struct closure *cl)
-{
- struct cache_set *c = container_of(cl, struct cache_set, sb_write);
-
- up(&c->sb_write_mutex);
-}
-
-/* Update cached mi: */
-static int cache_set_mi_update(struct cache_set *c,
- struct cache_member *mi,
- unsigned nr_in_set)
-{
- struct cache_member_rcu *new, *old;
- struct cache *ca;
- unsigned i;
-
- mutex_lock(&c->mi_lock);
-
- new = kzalloc(sizeof(struct cache_member_rcu) +
- sizeof(struct cache_member_cpu) * nr_in_set,
- GFP_KERNEL);
- if (!new) {
- mutex_unlock(&c->mi_lock);
- return -ENOMEM;
- }
-
- new->nr_in_set = nr_in_set;
-
- for (i = 0; i < nr_in_set; i++)
- new->m[i] = cache_mi_to_cpu_mi(&mi[i]);
-
- rcu_read_lock();
- for_each_cache(ca, c, i)
- ca->mi = new->m[i];
- rcu_read_unlock();
-
- old = rcu_dereference_protected(c->members,
- lockdep_is_held(&c->mi_lock));
-
- rcu_assign_pointer(c->members, new);
- if (old)
- kfree_rcu(old, rcu);
-
- mutex_unlock(&c->mi_lock);
- return 0;
-}
-
-/* doesn't copy member info */
-static void __copy_super(struct cache_sb *dst, struct cache_sb *src)
-{
- dst->version = src->version;
- dst->seq = src->seq;
- dst->user_uuid = src->user_uuid;
- dst->set_uuid = src->set_uuid;
- memcpy(dst->label, src->label, SB_LABEL_SIZE);
- dst->flags = src->flags;
- dst->flags2 = src->flags2;
- dst->nr_in_set = src->nr_in_set;
- dst->block_size = src->block_size;
-}
-
-static int cache_sb_to_cache_set(struct cache_set *c, struct cache_sb *src)
-{
- struct cache_member *new;
-
- lockdep_assert_held(&bch_register_lock);
-
- new = kzalloc(sizeof(struct cache_member) * src->nr_in_set,
- GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- memcpy(new, src->members,
- src->nr_in_set * sizeof(struct cache_member));
-
- if (cache_set_mi_update(c, new, src->nr_in_set)) {
- kfree(new);
- return -ENOMEM;
- }
-
- kfree(c->disk_mi);
- c->disk_mi = new;
-
- __copy_super(&c->disk_sb, src);
-
- c->sb.block_size = le16_to_cpu(src->block_size);
- c->sb.btree_node_size = CACHE_SET_BTREE_NODE_SIZE(src);
- c->sb.nr_in_set = src->nr_in_set;
- c->sb.clean = CACHE_SET_CLEAN(src);
- c->sb.meta_replicas_have= CACHE_SET_META_REPLICAS_HAVE(src);
- c->sb.data_replicas_have= CACHE_SET_DATA_REPLICAS_HAVE(src);
- c->sb.str_hash_type = CACHE_SET_STR_HASH_TYPE(src);
-
- return 0;
-}
-
-static int cache_sb_from_cache_set(struct cache_set *c, struct cache *ca)
-{
- struct cache_sb *src = &c->disk_sb, *dst = ca->disk_sb.sb;
-
- if (src->nr_in_set != dst->nr_in_set) {
- /*
- * We have to preserve the list of journal buckets on the
- * cache's superblock:
- */
- unsigned old_offset = bch_journal_buckets_offset(dst);
- unsigned u64s = bch_journal_buckets_offset(src)
- + bch_nr_journal_buckets(dst);
- int ret = bch_super_realloc(&ca->disk_sb, u64s);
-
- if (ret)
- return ret;
-
- dst->nr_in_set = src->nr_in_set;
- dst->u64s = cpu_to_le16(u64s);
-
- memmove(dst->_data + bch_journal_buckets_offset(dst),
- dst->_data + old_offset,
- bch_nr_journal_buckets(dst) * sizeof(u64));
- }
-
- memcpy(dst->_data,
- c->disk_mi,
- src->nr_in_set * sizeof(struct cache_member));
-
- __copy_super(dst, src);
-
- return 0;
-}
-
-static void __bcache_write_super(struct cache_set *c)
-{
- struct closure *cl = &c->sb_write;
- struct cache *ca;
- unsigned i;
-
- cache_set_mi_update(c, c->disk_mi, c->sb.nr_in_set);
-
- closure_init(cl, &c->cl);
-
- if (c->opts.nochanges)
- goto no_io;
-
- le64_add_cpu(&c->disk_sb.seq, 1);
-
- for_each_cache(ca, c, i) {
- struct cache_sb *sb = ca->disk_sb.sb;
- struct bio *bio = ca->disk_sb.bio;
-
- cache_sb_from_cache_set(c, ca);
-
- SET_CACHE_SB_CSUM_TYPE(sb, c->opts.metadata_checksum);
- sb->csum = cpu_to_le64(__csum_set(sb,
- le16_to_cpu(sb->u64s),
- CACHE_SB_CSUM_TYPE(sb)));
-
- bio_reset(bio);
- bio->bi_bdev = ca->disk_sb.bdev;
- bio->bi_end_io = write_super_endio;
- bio->bi_private = ca;
-
- closure_get(cl);
- percpu_ref_get(&ca->ref);
- __write_super(c, &ca->disk_sb);
- }
-no_io:
- closure_return_with_destructor(cl, bcache_write_super_unlock);
-}
-
-void bcache_write_super(struct cache_set *c)
-{
- down(&c->sb_write_mutex);
- __bcache_write_super(c);
-}
-
-void bch_check_mark_super_slowpath(struct cache_set *c, const struct bkey_i *k,
- bool meta)
-{
- struct cache_member *mi;
- struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
- const struct bch_extent_ptr *ptr;
-
- if (!CACHE_SET_SYNC(&c->disk_sb))
- return;
-
- down(&c->sb_write_mutex);
-
- /* recheck, might have raced */
- if (bch_check_super_marked(c, k, meta)) {
- up(&c->sb_write_mutex);
- return;
- }
-
- mi = c->disk_mi;
-
- extent_for_each_ptr(e, ptr)
- if (bch_extent_ptr_is_dirty(c, e, ptr))
- (meta
- ? SET_CACHE_HAS_METADATA
- : SET_CACHE_HAS_DATA)(mi + ptr->dev, true);
-
- __bcache_write_super(c);
-}
-
/* Cache set RO/RW: */
/*
@@ -768,8 +197,10 @@ static void bch_cache_set_read_only_work(struct work_struct *work)
if (!bch_journal_error(&c->journal) &&
!test_bit(CACHE_SET_ERROR, &c->flags)) {
- SET_CACHE_SET_CLEAN(&c->disk_sb, true);
- bcache_write_super(c);
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_CLEAN(c->disk_sb, true);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
}
} else {
/*
@@ -848,7 +279,7 @@ static const char *__bch_cache_set_read_write(struct cache_set *c)
err = "error starting allocator thread";
for_each_cache(ca, c, i)
- if (ca->mi.state == CACHE_ACTIVE &&
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
bch_cache_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
@@ -859,7 +290,7 @@ static const char *__bch_cache_set_read_write(struct cache_set *c)
goto err;
for_each_cache(ca, c, i) {
- if (ca->mi.state != CACHE_ACTIVE)
+ if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE)
continue;
err = "error starting moving GC thread";
@@ -913,6 +344,7 @@ static void cache_set_free(struct cache_set *c)
cancel_work_sync(&c->bio_submit_work);
cancel_work_sync(&c->read_retry_work);
+ bch_cache_set_encryption_free(c);
bch_btree_cache_free(c);
bch_journal_free(&c->journal);
bch_io_clock_exit(&c->io_clock[WRITE]);
@@ -939,7 +371,7 @@ static void cache_set_free(struct cache_set *c)
destroy_workqueue(c->wq);
kfree_rcu(rcu_dereference_protected(c->members, 1), rcu); /* shutting down */
- kfree(c->disk_mi);
+ free_pages((unsigned long) c->disk_sb, c->disk_sb_order);
kfree(c);
module_put(THIS_MODULE);
}
@@ -1043,15 +475,18 @@ void bch_cache_set_unregister(struct cache_set *c)
static unsigned cache_set_nr_devices(struct cache_set *c)
{
+ struct bch_sb_field_members *mi;
unsigned i, nr = 0;
- struct cache_member *mi = c->disk_mi;
- lockdep_assert_held(&bch_register_lock);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
- for (i = 0; i < c->disk_sb.nr_in_set; i++)
- if (!bch_is_zero(mi[i].uuid.b, sizeof(uuid_le)))
+ for (i = 0; i < c->disk_sb->nr_devices; i++)
+ if (!bch_is_zero(mi->members[i].uuid.b, sizeof(uuid_le)))
nr++;
+ mutex_unlock(&c->sb_lock);
+
return nr;
}
@@ -1059,7 +494,7 @@ static unsigned cache_set_nr_online_devices(struct cache_set *c)
{
unsigned i, nr = 0;
- for (i = 0; i < c->sb.nr_in_set; i++)
+ for (i = 0; i < c->sb.nr_devices; i++)
if (c->cache[i])
nr++;
@@ -1069,7 +504,7 @@ static unsigned cache_set_nr_online_devices(struct cache_set *c)
#define alloc_bucket_pages(gfp, ca) \
((void *) __get_free_pages(__GFP_ZERO|gfp, ilog2(bucket_pages(ca))))
-static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
+static struct cache_set *bch_cache_set_alloc(struct bch_sb *sb,
struct cache_set_opts opts)
{
struct cache_set *c;
@@ -1083,13 +518,12 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
c->minor = -1;
- sema_init(&c->sb_write_mutex, 1);
+ mutex_init(&c->sb_lock);
INIT_RADIX_TREE(&c->devices, GFP_KERNEL);
mutex_init(&c->btree_cache_lock);
mutex_init(&c->bucket_lock);
mutex_init(&c->btree_root_lock);
INIT_WORK(&c->read_only_work, bch_cache_set_read_only_work);
- mutex_init(&c->mi_lock);
init_rwsem(&c->gc_lock);
@@ -1146,10 +580,16 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
mutex_init(&c->uevent_lock);
- if (cache_sb_to_cache_set(c, sb))
+ mutex_lock(&c->sb_lock);
+
+ if (bch_sb_to_cache_set(c, sb)) {
+ mutex_unlock(&c->sb_lock);
goto err;
+ }
+
+ mutex_unlock(&c->sb_lock);
- scnprintf(c->name, sizeof(c->name), "%pU", &c->disk_sb.user_uuid);
+ scnprintf(c->name, sizeof(c->name), "%pU", &c->sb.user_uuid);
c->opts = cache_superblock_opts(sb);
cache_set_opts_apply(&c->opts, opts);
@@ -1165,7 +605,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
iter_size = (btree_blocks(c) + 1) * 2 *
sizeof(struct btree_node_iter_set);
- journal_entry_bytes = 512U << CACHE_SET_JOURNAL_ENTRY_SIZE(sb);
+ journal_entry_bytes = 512U << BCH_SB_JOURNAL_ENTRY_SIZE(sb);
if (!(c->wq = alloc_workqueue("bcache",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
@@ -1185,7 +625,7 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
mempool_init_page_pool(&c->bio_bounce_pages,
max_t(unsigned,
c->sb.btree_node_size,
- CRC32_EXTENT_SIZE_MAX) /
+ BCH_ENCODED_EXTENT_MAX) /
PAGE_SECTORS, 0) ||
!(c->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache_set)) ||
lg_lock_init(&c->bucket_stats_lock) ||
@@ -1196,7 +636,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
bch_io_clock_init(&c->io_clock[WRITE]) ||
bch_journal_alloc(&c->journal, journal_entry_bytes) ||
bch_btree_cache_alloc(c) ||
- bch_compress_init(c))
+ bch_cache_set_encryption_init(c) ||
+ bch_compress_init(c) ||
+ bch_check_set_has_compressed_data(c, c->opts.compression))
goto err;
c->bdi.ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
@@ -1247,7 +689,7 @@ static int bch_cache_set_online(struct cache_set *c)
if (IS_ERR(c->chardev))
return PTR_ERR(c->chardev);
- if (kobject_add(&c->kobj, NULL, "%pU", c->disk_sb.user_uuid.b) ||
+ if (kobject_add(&c->kobj, NULL, "%pU", c->sb.user_uuid.b) ||
kobject_add(&c->internal, &c->kobj, "internal") ||
kobject_add(&c->opts_dir, &c->kobj, "options") ||
kobject_add(&c->time_stats, &c->kobj, "time_stats") ||
@@ -1267,6 +709,7 @@ static int bch_cache_set_online(struct cache_set *c)
static const char *run_cache_set(struct cache_set *c)
{
const char *err = "cannot allocate memory";
+ struct bch_sb_field_members *mi;
struct cache *ca;
unsigned i, id;
time64_t now;
@@ -1285,15 +728,9 @@ static const char *run_cache_set(struct cache_set *c)
* we start testing it.
*/
for_each_cache(ca, c, i)
- cache_sb_from_cache_set(c, ca);
+ bch_sb_from_cache_set(c, ca);
- /*
- * CACHE_SET_SYNC is true if the cache set has already been run
- * and potentially has data.
- * It is false if it is the first time it is run.
- */
-
- if (CACHE_SET_SYNC(&c->disk_sb)) {
+ if (BCH_SB_INITIALIZED(c->disk_sb)) {
ret = bch_journal_read(c, &journal);
if (ret)
goto err;
@@ -1363,7 +800,7 @@ static const char *run_cache_set(struct cache_set *c)
err = "error starting allocator thread";
for_each_cache(ca, c, i)
- if (ca->mi.state == CACHE_ACTIVE &&
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
bch_cache_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
@@ -1381,25 +818,16 @@ static const char *run_cache_set(struct cache_set *c)
if (c->opts.norecovery)
goto recovery_done;
- /*
- * Write a new journal entry _before_ we start journalling new
- * data - otherwise, we could end up with btree node bsets with
- * journal seqs arbitrarily far in the future vs. the most
- * recently written journal entry on disk, if we crash before
- * writing the next journal entry:
- */
- err = "error writing journal entry";
- if (bch_journal_meta(&c->journal))
- goto err;
-
bch_verbose(c, "starting fsck:");
err = "error in fsck";
ret = bch_fsck(c, !c->opts.nofsck);
if (ret)
goto err;
+
bch_verbose(c, "fsck done");
} else {
- struct bkey_i_inode inode;
+ struct bch_inode_unpacked inode;
+ struct bkey_inode_buf packed_inode;
struct closure cl;
closure_init_stack(&cl);
@@ -1424,7 +852,7 @@ static const char *run_cache_set(struct cache_set *c)
err = "error starting allocator thread";
for_each_cache(ca, c, i)
- if (ca->mi.state == CACHE_ACTIVE &&
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE &&
bch_cache_allocator_start(ca)) {
percpu_ref_put(&ca->ref);
goto err;
@@ -1442,10 +870,13 @@ static const char *run_cache_set(struct cache_set *c)
bch_inode_init(c, &inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
- inode.k.p.inode = BCACHE_ROOT_INO;
+ inode.inum = BCACHE_ROOT_INO;
+
+ bch_inode_pack(&packed_inode, &inode);
err = "error creating root directory";
- if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i,
+ if (bch_btree_insert(c, BTREE_ID_INODES,
+ &packed_inode.inode.k_i,
NULL, NULL, NULL, 0))
goto err;
@@ -1462,16 +893,21 @@ recovery_done:
goto err;
}
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
now = ktime_get_seconds();
+
rcu_read_lock();
for_each_cache_rcu(ca, c, i)
- c->disk_mi[ca->sb.nr_this_dev].last_mount = cpu_to_le64(now);
+ mi->members[ca->dev_idx].last_mount = cpu_to_le64(now);
rcu_read_unlock();
- /* Mark cache set as initialized: */
- SET_CACHE_SET_SYNC(&c->disk_sb, true);
- SET_CACHE_SET_CLEAN(&c->disk_sb, false);
- bcache_write_super(c);
+ SET_BCH_SB_INITIALIZED(c->disk_sb, true);
+ SET_BCH_SB_CLEAN(c->disk_sb, false);
+ c->disk_sb->version = BCACHE_SB_VERSION_CDEV;
+
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
err = "dynamic fault";
if (cache_set_init_fault("run_cache_set"))
@@ -1527,41 +963,46 @@ err:
goto out;
}
-static const char *can_add_cache(struct cache_sb *sb,
+static const char *can_add_cache(struct bch_sb *sb,
struct cache_set *c)
{
+ struct bch_sb_field_members *sb_mi;
+
+ sb_mi = bch_sb_get_members(sb);
+ if (!sb_mi)
+ return "Invalid superblock: member info area missing";
+
if (le16_to_cpu(sb->block_size) != c->sb.block_size)
return "mismatched block size";
- if (le16_to_cpu(sb->members[sb->nr_this_dev].bucket_size) <
- CACHE_SET_BTREE_NODE_SIZE(&c->disk_sb))
+ if (le16_to_cpu(sb_mi->members[sb->dev_idx].bucket_size) <
+ BCH_SB_BTREE_NODE_SIZE(c->disk_sb))
return "new cache bucket_size is too small";
return NULL;
}
-static const char *can_attach_cache(struct cache_sb *sb, struct cache_set *c)
+static const char *can_attach_cache(struct bch_sb *sb, struct cache_set *c)
{
+ struct bch_sb_field_members *mi = bch_sb_get_members(c->disk_sb);
+ struct bch_sb_field_members *dev_mi = bch_sb_get_members(sb);
+ uuid_le dev_uuid = dev_mi->members[sb->dev_idx].uuid;
const char *err;
- bool match;
err = can_add_cache(sb, c);
if (err)
return err;
+ if (bch_is_zero(&dev_uuid, sizeof(dev_uuid)))
+ return "device has been removed";
+
/*
* When attaching an existing device, the cache set superblock must
* already contain member_info with a matching UUID
*/
- match = le64_to_cpu(sb->seq) <= le64_to_cpu(c->disk_sb.seq)
- ? (sb->nr_this_dev < c->disk_sb.nr_in_set &&
- !memcmp(&c->disk_mi[sb->nr_this_dev].uuid,
- &sb->disk_uuid, sizeof(uuid_le)))
- : (sb->nr_this_dev < sb->nr_in_set &&
- !memcmp(&sb->members[sb->nr_this_dev].uuid,
- &sb->disk_uuid, sizeof(uuid_le)));
-
- if (!match)
+ if (sb->dev_idx >= c->disk_sb->nr_devices ||
+ memcmp(&mi->members[sb->dev_idx].uuid,
+ &dev_uuid, sizeof(uuid_le)))
return "cache sb does not match set";
return NULL;
@@ -1572,13 +1013,14 @@ static const char *can_attach_cache(struct cache_sb *sb, struct cache_set *c)
bool bch_cache_read_only(struct cache *ca)
{
struct cache_set *c = ca->set;
+ struct bch_sb_field_members *mi;
char buf[BDEVNAME_SIZE];
bdevname(ca->disk_sb.bdev, buf);
lockdep_assert_held(&bch_register_lock);
- if (ca->mi.state != CACHE_ACTIVE)
+ if (ca->mi.state != BCH_MEMBER_STATE_ACTIVE)
return false;
if (!bch_cache_may_remove(ca)) {
@@ -1609,8 +1051,12 @@ bool bch_cache_read_only(struct cache *ca)
bch_notice(c, "%s read only", bdevname(ca->disk_sb.bdev, buf));
bch_notify_cache_read_only(ca);
- SET_CACHE_STATE(&c->disk_mi[ca->sb.nr_this_dev], CACHE_RO);
- bcache_write_super(c);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
+ BCH_MEMBER_STATE_RO);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
return true;
}
@@ -1618,7 +1064,7 @@ static const char *__bch_cache_read_write(struct cache_set *c, struct cache *ca)
{
lockdep_assert_held(&bch_register_lock);
- if (ca->mi.state == CACHE_ACTIVE)
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE)
return NULL;
if (test_bit(CACHE_DEV_REMOVING, &ca->flags))
@@ -1645,14 +1091,19 @@ static const char *__bch_cache_read_write(struct cache_set *c, struct cache *ca)
const char *bch_cache_read_write(struct cache *ca)
{
struct cache_set *c = ca->set;
+ struct bch_sb_field_members *mi;
const char *err;
err = __bch_cache_read_write(c, ca);
if (err)
return err;
- SET_CACHE_STATE(&c->disk_mi[ca->sb.nr_this_dev], CACHE_ACTIVE);
- bcache_write_super(c);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ SET_BCH_MEMBER_STATE(&mi->members[ca->dev_idx],
+ BCH_MEMBER_STATE_ACTIVE);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
return NULL;
}
@@ -1681,14 +1132,14 @@ static void bch_cache_free_work(struct work_struct *work)
if (c && c->kobj.state_in_sysfs) {
char buf[12];
- sprintf(buf, "cache%u", ca->sb.nr_this_dev);
+ sprintf(buf, "cache%u", ca->dev_idx);
sysfs_remove_link(&c->kobj, buf);
}
if (ca->kobj.state_in_sysfs)
kobject_del(&ca->kobj);
- free_super(&ca->disk_sb);
+ bch_free_super(&ca->disk_sb);
/*
* bch_cache_stop can be called in the middle of initialization
@@ -1697,10 +1148,10 @@ static void bch_cache_free_work(struct work_struct *work)
* However, they were zeroed when the object was allocated.
*/
+ bch_journal_free_cache(ca);
free_percpu(ca->sectors_written);
bioset_exit(&ca->replica_set);
free_percpu(ca->bucket_stats_percpu);
- kfree(ca->journal.bucket_seq);
free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
kfree(ca->prio_buckets);
kfree(ca->bio_prio);
@@ -1754,8 +1205,8 @@ static void bch_cache_stop(struct cache *ca)
lockdep_assert_held(&bch_register_lock);
if (c) {
- BUG_ON(rcu_access_pointer(c->cache[ca->sb.nr_this_dev]) != ca);
- rcu_assign_pointer(c->cache[ca->sb.nr_this_dev], NULL);
+ BUG_ON(rcu_access_pointer(c->cache[ca->dev_idx]) != ca);
+ rcu_assign_pointer(c->cache[ca->dev_idx], NULL);
}
call_rcu(&ca->free_rcu, bch_cache_free_rcu);
@@ -1764,10 +1215,11 @@ static void bch_cache_stop(struct cache *ca)
static void bch_cache_remove_work(struct work_struct *work)
{
struct cache *ca = container_of(work, struct cache, remove_work);
+ struct bch_sb_field_members *mi;
struct cache_set *c = ca->set;
char name[BDEVNAME_SIZE];
bool force = test_bit(CACHE_DEV_FORCE_REMOVE, &ca->flags);
- unsigned dev = ca->sb.nr_this_dev;
+ unsigned dev_idx = ca->dev_idx;
bdevname(ca->disk_sb.bdev, name);
@@ -1780,17 +1232,21 @@ static void bch_cache_remove_work(struct work_struct *work)
if (!ca->mi.has_data) {
/* Nothing to do: */
} else if (!bch_move_data_off_device(ca)) {
- lockdep_assert_held(&bch_register_lock);
- SET_CACHE_HAS_DATA(&c->disk_mi[ca->sb.nr_this_dev], false);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
- bcache_write_super(c);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
} else if (force) {
bch_flag_data_bad(ca);
- lockdep_assert_held(&bch_register_lock);
- SET_CACHE_HAS_DATA(&c->disk_mi[ca->sb.nr_this_dev], false);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ SET_BCH_MEMBER_HAS_DATA(&mi->members[ca->dev_idx], false);
- bcache_write_super(c);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
} else {
bch_err(c, "Remove of %s failed, unable to migrate data off",
name);
@@ -1803,10 +1259,12 @@ static void bch_cache_remove_work(struct work_struct *work)
if (!ca->mi.has_metadata) {
/* Nothing to do: */
} else if (!bch_move_meta_data_off_device(ca)) {
- lockdep_assert_held(&bch_register_lock);
- SET_CACHE_HAS_METADATA(&c->disk_mi[ca->sb.nr_this_dev], false);
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ SET_BCH_MEMBER_HAS_METADATA(&mi->members[ca->dev_idx], false);
- bcache_write_super(c);
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
} else {
bch_err(c, "Remove of %s failed, unable to migrate metadata off",
name);
@@ -1821,7 +1279,7 @@ static void bch_cache_remove_work(struct work_struct *work)
bch_notify_cache_removed(ca);
spin_lock(&c->journal.lock);
- c->journal.prio_buckets[dev] = 0;
+ c->journal.prio_buckets[dev_idx] = 0;
spin_unlock(&c->journal.lock);
bch_journal_meta(&c->journal);
@@ -1844,12 +1302,16 @@ static void bch_cache_remove_work(struct work_struct *work)
lockdep_assert_held(&bch_register_lock);
/*
- * Free this device's slot in the cache_member array - all pointers to
+ * Free this device's slot in the bch_member array - all pointers to
* this device must be gone:
*/
- memset(&c->disk_mi[dev].uuid, 0, sizeof(c->disk_mi[dev].uuid));
+ mutex_lock(&c->sb_lock);
+ mi = bch_sb_get_members(c->disk_sb);
+ memset(&mi->members[dev_idx].uuid, 0, sizeof(mi->members[dev_idx].uuid));
+
+ bch_write_super(c);
+ mutex_unlock(&c->sb_lock);
- bcache_write_super(c);
mutex_unlock(&bch_register_lock);
closure_put(&c->cl);
@@ -1891,7 +1353,7 @@ static int bch_cache_online(struct cache *ca)
lockdep_assert_held(&bch_register_lock);
- sprintf(buf, "cache%u", ca->sb.nr_this_dev);
+ sprintf(buf, "cache%u", ca->dev_idx);
if (kobject_add(&ca->kobj,
&part_to_dev(ca->disk_sb.bdev->bd_part)->kobj,
@@ -1907,13 +1369,14 @@ static const char *cache_alloc(struct bcache_superblock *sb,
struct cache_set *c,
struct cache **ret)
{
+ struct bch_member *member;
size_t reserve_none, movinggc_reserve, free_inc_reserve, total_reserve;
size_t heap_size;
- unsigned i, journal_entry_pages;
+ unsigned i;
const char *err = "cannot allocate memory";
struct cache *ca;
- if (c->sb.nr_in_set == 1)
+ if (c->sb.nr_devices == 1)
bdevname(sb->bdev, c->name);
if (cache_set_init_fault("cache_alloc"))
@@ -1934,7 +1397,7 @@ static const char *cache_alloc(struct bcache_superblock *sb,
spin_lock_init(&ca->self.lock);
ca->self.nr_devices = 1;
rcu_assign_pointer(ca->self.d[0].dev, ca);
- ca->sb.nr_this_dev = sb->sb->nr_this_dev;
+ ca->dev_idx = sb->sb->dev_idx;
INIT_WORK(&ca->free_work, bch_cache_free_work);
INIT_WORK(&ca->remove_work, bch_cache_remove_work);
@@ -1953,8 +1416,11 @@ static const char *cache_alloc(struct bcache_superblock *sb,
if (cache_set_init_fault("cache_alloc"))
goto err;
- ca->mi = cache_mi_to_cpu_mi(ca->disk_sb.sb->members +
- ca->disk_sb.sb->nr_this_dev);
+ member = bch_sb_get_members(ca->disk_sb.sb)->members +
+ ca->disk_sb.sb->dev_idx;
+
+ ca->mi = cache_mi_to_cpu_mi(member);
+ ca->uuid = member->uuid;
ca->bucket_bits = ilog2(ca->mi.bucket_size);
/* XXX: tune these */
@@ -1968,10 +1434,6 @@ static const char *cache_alloc(struct bcache_superblock *sb,
free_inc_reserve = movinggc_reserve / 2;
heap_size = movinggc_reserve * 8;
- journal_entry_pages =
- DIV_ROUND_UP(1U << CACHE_SET_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb),
- PAGE_SECTORS);
-
if (!init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_BTREE], BTREE_NODE_RESERVE, GFP_KERNEL) ||
!init_fifo(&ca->free[RESERVE_MOVINGGC],
@@ -1987,13 +1449,11 @@ static const char *cache_alloc(struct bcache_superblock *sb,
2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
!(ca->bucket_stats_percpu = alloc_percpu(struct bucket_stats_cache)) ||
- !(ca->journal.bucket_seq = kcalloc(bch_nr_journal_buckets(ca->disk_sb.sb),
- sizeof(u64), GFP_KERNEL)) ||
- !(ca->journal.bio = bio_kmalloc(GFP_KERNEL, journal_entry_pages)) ||
- !(ca->bio_prio = bio_kmalloc(GFP_KERNEL, bucket_pages(ca))) ||
+ !(ca->bio_prio = bio_kmalloc(GFP_NOIO, bucket_pages(ca))) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio)) ||
- !(ca->sectors_written = alloc_percpu(*ca->sectors_written)))
+ !(ca->sectors_written = alloc_percpu(*ca->sectors_written)) ||
+ bch_journal_init_cache(ca))
goto err;
ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
@@ -2006,15 +1466,6 @@ static const char *cache_alloc(struct bcache_superblock *sb,
ca->copygc_write_point.group = &ca->self;
ca->tiering_write_point.group = &ca->self;
- kobject_get(&c->kobj);
- ca->set = c;
-
- kobject_get(&ca->kobj);
- rcu_assign_pointer(c->cache[ca->sb.nr_this_dev], ca);
-
- if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb.seq))
- cache_sb_to_cache_set(c, ca->disk_sb.sb);
-
/*
* Increase journal write timeout if flushes to this device are
* expensive:
@@ -2024,6 +1475,19 @@ static const char *cache_alloc(struct bcache_superblock *sb,
c->journal.write_delay_ms =
max(c->journal.write_delay_ms, 1000U);
+ kobject_get(&c->kobj);
+ ca->set = c;
+
+ kobject_get(&ca->kobj);
+ rcu_assign_pointer(c->cache[ca->dev_idx], ca);
+
+ mutex_lock(&c->sb_lock);
+
+ if (le64_to_cpu(ca->disk_sb.sb->seq) > le64_to_cpu(c->disk_sb->seq))
+ bch_sb_to_cache_set(c, ca->disk_sb.sb);
+
+ mutex_unlock(&c->sb_lock);
+
err = "error creating kobject";
if (c->kobj.state_in_sysfs &&
bch_cache_online(ca))
@@ -2046,7 +1510,7 @@ static struct cache_set *cache_set_lookup(uuid_le uuid)
lockdep_assert_held(&bch_register_lock);
list_for_each_entry(c, &bch_cache_sets, list)
- if (!memcmp(&c->disk_sb.set_uuid, &uuid, sizeof(uuid_le)))
+ if (!memcmp(&c->disk_sb->uuid, &uuid, sizeof(uuid_le)))
return c;
return NULL;
@@ -2060,13 +1524,13 @@ static const char *register_cache(struct bcache_superblock *sb,
struct cache_set *c;
bool allocated_cache_set = false;
- err = validate_cache_super(sb);
+ err = bch_validate_cache_super(sb);
if (err)
return err;
bdevname(sb->bdev, name);
- c = cache_set_lookup(sb->sb->set_uuid);
+ c = cache_set_lookup(sb->sb->uuid);
if (c) {
err = can_attach_cache(sb->sb, c);
if (err)
@@ -2106,20 +1570,23 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path)
struct bcache_superblock sb;
const char *err;
struct cache *ca;
- struct cache_member *new_mi = NULL;
- struct cache_member mi;
- unsigned nr_this_dev, nr_in_set, u64s;
+ struct bch_sb_field *f;
+ struct bch_sb_field_members *mi, *dev_mi;
+ struct bch_member saved_mi;
+ unsigned dev_idx, nr_devices, u64s;
int ret = -EINVAL;
mutex_lock(&bch_register_lock);
- err = read_super(&sb, c->opts, path);
+ err = bch_read_super(&sb, c->opts, path);
if (err)
- goto err_unlock;
+ goto err_unlock_register;
- err = validate_cache_super(&sb);
+ err = bch_validate_cache_super(&sb);
if (err)
- goto err_unlock;
+ goto err_unlock_register;
+
+ mutex_lock(&c->sb_lock);
err = can_add_cache(sb.sb, c);
if (err)
@@ -2129,8 +1596,9 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path)
* Preserve the old cache member information (esp. tier)
* before we start bashing the disk stuff.
*/
- mi = sb.sb->members[sb.sb->nr_this_dev];
- mi.last_mount = cpu_to_le64(ktime_get_seconds());
+ dev_mi = bch_sb_get_members(sb.sb);
+ saved_mi = dev_mi->members[sb.sb->dev_idx];
+ saved_mi.last_mount = cpu_to_le64(ktime_get_seconds());
down_read(&c->gc_lock);
@@ -2140,9 +1608,10 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path)
if (test_bit(CACHE_SET_GC_FAILURE, &c->flags))
goto no_slot;
- for (nr_this_dev = 0; nr_this_dev < MAX_CACHES_PER_SET; nr_this_dev++)
- if (nr_this_dev >= c->sb.nr_in_set ||
- bch_is_zero(c->disk_mi[nr_this_dev].uuid.b,
+ mi = bch_sb_get_members(c->disk_sb);
+ for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++)
+ if (dev_idx >= c->sb.nr_devices ||
+ bch_is_zero(mi->members[dev_idx].uuid.b,
sizeof(uuid_le)))
goto have_slot;
no_slot:
@@ -2153,52 +1622,46 @@ no_slot:
goto err_unlock;
have_slot:
- nr_in_set = max_t(unsigned, nr_this_dev + 1, c->sb.nr_in_set);
up_read(&c->gc_lock);
- u64s = nr_in_set * (sizeof(struct cache_member) / sizeof(u64));
+ nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
+ u64s = (sizeof(struct bch_sb_field_members) +
+ sizeof(struct bch_member) * nr_devices) / sizeof(u64);
err = "no space in superblock for member info";
- if (bch_super_realloc(&sb, u64s))
+
+ f = bch_fs_sb_field_resize(c, &mi->field, u64s);
+ if (!f)
goto err_unlock;
- new_mi = dynamic_fault("bcache:add:member_info_realloc")
- ? NULL
- : kmalloc(sizeof(struct cache_member) * nr_in_set,
- GFP_KERNEL);
- if (!new_mi) {
- err = "cannot allocate memory";
- ret = -ENOMEM;
+ mi = container_of(f, struct bch_sb_field_members, field);
+
+ f = bch_dev_sb_field_resize(&sb, &dev_mi->field, u64s);
+ if (!f)
goto err_unlock;
- }
- memcpy(new_mi, c->disk_mi,
- sizeof(struct cache_member) * nr_in_set);
- new_mi[nr_this_dev] = mi;
+ dev_mi = container_of(f, struct bch_sb_field_members, field);
+ memcpy(dev_mi, mi, u64s * sizeof(u64));
+ dev_mi->members[dev_idx] = saved_mi;
- sb.sb->nr_this_dev = nr_this_dev;
- sb.sb->nr_in_set = nr_in_set;
- sb.sb->u64s = cpu_to_le16(u64s);
- memcpy(sb.sb->members, new_mi,
- sizeof(struct cache_member) * nr_in_set);
+ sb.sb->dev_idx = dev_idx;
+ sb.sb->nr_devices = nr_devices;
- if (cache_set_mi_update(c, new_mi, nr_in_set)) {
+ if (bch_cache_set_mi_update(c, dev_mi->members, nr_devices)) {
err = "cannot allocate memory";
ret = -ENOMEM;
goto err_unlock;
}
/* commit new member info */
- swap(c->disk_mi, new_mi);
- kfree(new_mi);
- new_mi = NULL;
- c->disk_sb.nr_in_set = nr_in_set;
- c->sb.nr_in_set = nr_in_set;
+ memcpy(mi, dev_mi, u64s * sizeof(u64));
+ c->disk_sb->nr_devices = nr_devices;
+ c->sb.nr_devices = nr_devices;
err = cache_alloc(&sb, c, &ca);
if (err)
goto err_unlock;
- bcache_write_super(c);
+ bch_write_super(c);
err = "journal alloc failed";
if (bch_cache_journal_alloc(ca))
@@ -2206,21 +1669,23 @@ have_slot:
bch_notify_cache_added(ca);
- if (ca->mi.state == CACHE_ACTIVE) {
+ if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE) {
err = __bch_cache_read_write(c, ca);
if (err)
goto err_put;
}
kobject_put(&ca->kobj);
+ mutex_unlock(&c->sb_lock);
mutex_unlock(&bch_register_lock);
return 0;
err_put:
bch_cache_stop(ca);
err_unlock:
- kfree(new_mi);
- free_super(&sb);
+ mutex_unlock(&c->sb_lock);
+err_unlock_register:
mutex_unlock(&bch_register_lock);
+ bch_free_super(&sb);
bch_err(c, "Unable to add device: %s", err);
return ret ?: -EINVAL;
@@ -2250,14 +1715,14 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
goto err;
/*
- * read_super() needs to happen under register_lock, so that the
+ * bch_read_super() needs to happen under register_lock, so that the
* exclusive open is atomic with adding the new cache set to the list of
* cache sets:
*/
mutex_lock(&bch_register_lock);
for (i = 0; i < nr_devices; i++) {
- err = read_super(&sb[i], opts, devices[i]);
+ err = bch_read_super(&sb[i], opts, devices[i]);
if (err)
goto err_unlock;
@@ -2265,13 +1730,13 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
if (__SB_IS_BDEV(le64_to_cpu(sb[i].sb->version)))
goto err_unlock;
- err = validate_cache_super(&sb[i]);
+ err = bch_validate_cache_super(&sb[i]);
if (err)
goto err_unlock;
}
err = "cache set already registered";
- if (cache_set_lookup(sb->sb->set_uuid))
+ if (cache_set_lookup(sb->sb->uuid))
goto err_unlock;
err = "cannot allocate memory";
@@ -2317,7 +1782,7 @@ err_unlock:
mutex_unlock(&bch_register_lock);
err:
for (i = 0; i < nr_devices; i++)
- free_super(&sb[i]);
+ bch_free_super(&sb[i]);
goto out;
}
@@ -2329,7 +1794,7 @@ const char *bch_register_one(const char *path)
mutex_lock(&bch_register_lock);
- err = read_super(&sb, opts, path);
+ err = bch_read_super(&sb, opts, path);
if (err)
goto err;
@@ -2338,7 +1803,7 @@ const char *bch_register_one(const char *path)
else
err = register_cache(&sb, opts);
- free_super(&sb);
+ bch_free_super(&sb);
err:
mutex_unlock(&bch_register_lock);
return err;
@@ -2440,8 +1905,8 @@ static void bcache_exit(void)
class_destroy(bch_chardev_class);
if (bch_chardev_major > 0)
unregister_chrdev(bch_chardev_major, "bcache");
- if (!IS_ERR_OR_NULL(bch_sha1))
- crypto_free_shash(bch_sha1);
+ if (!IS_ERR_OR_NULL(bch_sha256))
+ crypto_free_shash(bch_sha256);
unregister_reboot_notifier(&reboot);
}
@@ -2459,8 +1924,8 @@ static int __init bcache_init(void)
closure_debug_init();
bkey_pack_test();
- bch_sha1 = crypto_alloc_shash("sha1", 0, 0);
- if (IS_ERR(bch_sha1))
+ bch_sha256 = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(bch_sha256))
goto err;
bch_chardev_major = register_chrdev(0, "bcache-ctl", &bch_chardev_fops);
diff --git a/libbcache/super.h b/libbcache/super.h
index 635e1a6f..014d7aed 100644
--- a/libbcache/super.h
+++ b/libbcache/super.h
@@ -18,17 +18,12 @@ static inline sector_t bucket_remainder(const struct cache *ca, sector_t s)
return s & (ca->mi.bucket_size - 1);
}
-#define cache_member_info_get(_c) \
- (rcu_read_lock(), rcu_dereference((_c)->members))
-
-#define cache_member_info_put() rcu_read_unlock()
-
static inline struct cache *bch_next_cache_rcu(struct cache_set *c,
unsigned *iter)
{
struct cache *ret = NULL;
- while (*iter < c->sb.nr_in_set &&
+ while (*iter < c->sb.nr_devices &&
!(ret = rcu_dereference(c->cache[*iter])))
(*iter)++;
@@ -59,40 +54,6 @@ static inline struct cache *bch_get_next_cache(struct cache_set *c,
(ca = bch_get_next_cache(c, &(iter))); \
percpu_ref_put(&ca->ref), (iter)++)
-void bch_check_mark_super_slowpath(struct cache_set *,
- const struct bkey_i *, bool);
-
-static inline bool bch_check_super_marked(struct cache_set *c,
- const struct bkey_i *k, bool meta)
-{
- struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
- const struct bch_extent_ptr *ptr;
- struct cache_member_cpu *mi = cache_member_info_get(c)->m;
- bool ret = true;
-
- extent_for_each_ptr(e, ptr)
- if (!(meta
- ? mi[ptr->dev].has_metadata
- : mi[ptr->dev].has_data) &&
- bch_extent_ptr_is_dirty(c, e, ptr)) {
- ret = false;
- break;
- }
-
- cache_member_info_put();
-
- return ret;
-}
-
-static inline void bch_check_mark_super(struct cache_set *c,
- const struct bkey_i *k, bool meta)
-{
- if (bch_check_super_marked(c, k, meta))
- return;
-
- bch_check_mark_super_slowpath(c, k, meta);
-}
-
static inline bool bch_cache_may_remove(struct cache *ca)
{
struct cache_set *c = ca->set;
@@ -119,11 +80,6 @@ static inline bool bch_cache_may_remove(struct cache *ca)
rcu_access_pointer(tier->d[0].dev) != ca;
}
-void free_super(struct bcache_superblock *);
-int bch_super_realloc(struct bcache_superblock *, unsigned);
-void bcache_write_super(struct cache_set *);
-void __write_super(struct cache_set *, struct bcache_superblock *);
-
void bch_cache_set_release(struct kobject *);
void bch_cache_release(struct kobject *);
@@ -149,7 +105,7 @@ extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
extern struct idr bch_cache_set_minor;
extern struct workqueue_struct *bcache_io_wq;
-extern struct crypto_shash *bch_sha1;
+extern struct crypto_shash *bch_sha256;
extern struct kobj_type bch_cache_set_ktype;
extern struct kobj_type bch_cache_set_internal_ktype;
diff --git a/libbcache/super_types.h b/libbcache/super_types.h
index d89f780f..41eaf0dd 100644
--- a/libbcache/super_types.h
+++ b/libbcache/super_types.h
@@ -2,7 +2,7 @@
#define _BCACHE_SUPER_TYPES_H
struct bcache_superblock {
- struct cache_sb *sb;
+ struct bch_sb *sb;
struct block_device *bdev;
struct bio *bio;
unsigned page_order;
diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c
index 58a71259..57b7dd9d 100644
--- a/libbcache/sysfs.c
+++ b/libbcache/sysfs.c
@@ -8,9 +8,11 @@
#include "bcache.h"
#include "alloc.h"
#include "blockdev.h"
+#include "compress.h"
#include "sysfs.h"
#include "btree_cache.h"
#include "btree_iter.h"
+#include "btree_update.h"
#include "btree_gc.h"
#include "buckets.h"
#include "inode.h"
@@ -19,6 +21,7 @@
#include "move.h"
#include "opts.h"
#include "request.h"
+#include "super-io.h"
#include "writeback.h"
#include <linux/blkdev.h>
@@ -139,14 +142,14 @@ read_attribute(tier);
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
static struct attribute sysfs_opt_##_name = { \
.name = #_name, \
.mode = S_IRUGO|(_perm ? S_IWUSR : 0) \
};
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
#define BCH_TIME_STAT(name, frequency_units, duration_units) \
sysfs_time_stats_attribute(name, frequency_units, duration_units);
@@ -193,8 +196,8 @@ SHOW(bch_cached_dev)
sysfs_print(state, states[BDEV_STATE(dc->disk_sb.sb)]);
if (attr == &sysfs_label) {
- memcpy(buf, dc->disk_sb.sb->label, SB_LABEL_SIZE);
- buf[SB_LABEL_SIZE + 1] = '\0';
+ memcpy(buf, dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
+ buf[BCH_SB_LABEL_SIZE + 1] = '\0';
strcat(buf, "\n");
return strlen(buf);
}
@@ -248,24 +251,25 @@ STORE(__cached_dev)
u64 journal_seq = 0;
int ret = 0;
- if (size > SB_LABEL_SIZE)
+ if (size > BCH_SB_LABEL_SIZE)
return -EINVAL;
mutex_lock(&dc->disk.inode_lock);
memcpy(dc->disk_sb.sb->label, buf, size);
- if (size < SB_LABEL_SIZE)
+ if (size < BCH_SB_LABEL_SIZE)
dc->disk_sb.sb->label[size] = '\0';
if (size && dc->disk_sb.sb->label[size - 1] == '\n')
dc->disk_sb.sb->label[size - 1] = '\0';
memcpy(dc->disk.inode.v.i_label,
- dc->disk_sb.sb->label, SB_LABEL_SIZE);
+ dc->disk_sb.sb->label, BCH_SB_LABEL_SIZE);
bch_write_bdev_super(dc, NULL);
if (dc->disk.c)
- ret = bch_inode_update(dc->disk.c, &dc->disk.inode.k_i,
+ ret = bch_btree_update(dc->disk.c, BTREE_ID_INODES,
+ &dc->disk.inode.k_i,
&journal_seq);
mutex_unlock(&dc->disk.inode_lock);
@@ -367,8 +371,8 @@ SHOW(bch_blockdev_volume)
sysfs_hprint(size, le64_to_cpu(d->inode.v.i_size));
if (attr == &sysfs_label) {
- memcpy(buf, d->inode.v.i_label, SB_LABEL_SIZE);
- buf[SB_LABEL_SIZE + 1] = '\0';
+ memcpy(buf, d->inode.v.i_label, BCH_SB_LABEL_SIZE);
+ buf[BCH_SB_LABEL_SIZE + 1] = '\0';
strcat(buf, "\n");
return strlen(buf);
}
@@ -397,7 +401,8 @@ STORE(__bch_blockdev_volume)
}
}
d->inode.v.i_size = cpu_to_le64(v);
- ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
+ ret = bch_btree_update(d->c, BTREE_ID_INODES,
+ &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
@@ -417,8 +422,9 @@ STORE(__bch_blockdev_volume)
mutex_lock(&d->inode_lock);
- memcpy(d->inode.v.i_label, buf, SB_LABEL_SIZE);
- ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
+ memcpy(d->inode.v.i_label, buf, BCH_SB_LABEL_SIZE);
+ ret = bch_btree_update(d->c, BTREE_ID_INODES,
+ &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
@@ -677,10 +683,8 @@ SHOW(bch_cache_set)
sysfs_print(tiering_percent, c->tiering_percent);
sysfs_pd_controller_show(tiering, &c->tiering_pd);
- sysfs_printf(meta_replicas_have, "%llu",
- CACHE_SET_META_REPLICAS_HAVE(&c->disk_sb));
- sysfs_printf(data_replicas_have, "%llu",
- CACHE_SET_DATA_REPLICAS_HAVE(&c->disk_sb));
+ sysfs_printf(meta_replicas_have, "%u", c->sb.meta_replicas_have);
+ sysfs_printf(data_replicas_have, "%u", c->sb.data_replicas_have);
/* Debugging: */
@@ -705,7 +709,7 @@ SHOW(bch_cache_set)
if (attr == &sysfs_compression_stats)
return bch_compression_stats(c, buf);
- sysfs_printf(internal_uuid, "%pU", c->disk_sb.set_uuid.b);
+ sysfs_printf(internal_uuid, "%pU", c->sb.uuid.b);
return 0;
}
@@ -945,15 +949,15 @@ SHOW(bch_cache_set_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
if (attr == &sysfs_opt_##_name) \
return _choices == bch_bool_opt || _choices == bch_uint_opt\
? snprintf(buf, PAGE_SIZE, "%i\n", c->opts._name)\
: bch_snprint_string_list(buf, PAGE_SIZE, \
_choices, c->opts._name);\
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
return 0;
}
@@ -962,7 +966,7 @@ STORE(bch_cache_set_opts_dir)
{
struct cache_set *c = container_of(kobj, struct cache_set, opts_dir);
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
if (attr == &sysfs_opt_##_name) { \
ssize_t v = (_choices == bch_bool_opt || \
_choices == bch_uint_opt) \
@@ -972,18 +976,28 @@ STORE(bch_cache_set_opts_dir)
if (v < 0) \
return v; \
\
- c->opts._name = v; \
+ mutex_lock(&c->sb_lock); \
+ if (attr == &sysfs_opt_compression) { \
+ int ret = bch_check_set_has_compressed_data(c, v);\
+ if (ret) { \
+ mutex_unlock(&c->sb_lock); \
+ return ret; \
+ } \
+ } \
\
- if (_sb_opt##_BITS && v != _sb_opt(&c->disk_sb)) { \
- SET_##_sb_opt(&c->disk_sb, v); \
- bcache_write_super(c); \
+ if (_sb_opt##_BITS && v != _sb_opt(c->disk_sb)) { \
+ SET_##_sb_opt(c->disk_sb, v); \
+ bch_write_super(c); \
} \
\
+ c->opts._name = v; \
+ mutex_unlock(&c->sb_lock); \
+ \
return size; \
}
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
return size;
}
@@ -993,11 +1007,11 @@ static void bch_cache_set_opts_dir_release(struct kobject *k)
}
static struct attribute *bch_cache_set_opts_dir_files[] = {
-#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
+#define BCH_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
&sysfs_opt_##_name,
- CACHE_SET_VISIBLE_OPTS()
-#undef CACHE_SET_OPT
+ BCH_VISIBLE_OPTS()
+#undef BCH_OPT
NULL
};
@@ -1176,7 +1190,7 @@ SHOW(bch_cache)
struct cache_set *c = ca->set;
struct bucket_stats_cache stats = bch_bucket_stats_read_cache(ca);
- sysfs_printf(uuid, "%pU\n", ca->disk_sb.sb->disk_uuid.b);
+ sysfs_printf(uuid, "%pU\n", ca->uuid.b);
sysfs_hprint(bucket_size, bucket_bytes(ca));
sysfs_print(bucket_size_bytes, bucket_bytes(ca));
@@ -1242,17 +1256,21 @@ STORE(__bch_cache)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
struct cache_set *c = ca->set;
- struct cache_member *mi = &c->disk_mi[ca->sb.nr_this_dev];
+ struct bch_member *mi;
sysfs_pd_controller_store(copy_gc, &ca->moving_gc_pd);
if (attr == &sysfs_discard) {
bool v = strtoul_or_return(buf);
- if (v != CACHE_DISCARD(mi)) {
- SET_CACHE_DISCARD(mi, v);
- bcache_write_super(c);
+ mutex_lock(&c->sb_lock);
+ mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+
+ if (v != BCH_MEMBER_DISCARD(mi)) {
+ SET_BCH_MEMBER_DISCARD(mi, v);
+ bch_write_super(c);
}
+ mutex_unlock(&c->sb_lock);
}
if (attr == &sysfs_cache_replacement_policy) {
@@ -1261,10 +1279,14 @@ STORE(__bch_cache)
if (v < 0)
return v;
- if ((unsigned) v != CACHE_REPLACEMENT(mi)) {
- SET_CACHE_REPLACEMENT(mi, v);
- bcache_write_super(c);
+ mutex_lock(&c->sb_lock);
+ mi = &bch_sb_get_members(c->disk_sb)->members[ca->dev_idx];
+
+ if ((unsigned) v != BCH_MEMBER_REPLACEMENT(mi)) {
+ SET_BCH_MEMBER_REPLACEMENT(mi, v);
+ bch_write_super(c);
}
+ mutex_unlock(&c->sb_lock);
}
if (attr == &sysfs_state_rw) {
@@ -1279,14 +1301,14 @@ STORE(__bch_cache)
return size;
switch (v) {
- case CACHE_ACTIVE:
+ case BCH_MEMBER_STATE_ACTIVE:
err = bch_cache_read_write(ca);
break;
- case CACHE_RO:
+ case BCH_MEMBER_STATE_RO:
bch_cache_read_only(ca);
break;
- case CACHE_FAILED:
- case CACHE_SPARE:
+ case BCH_MEMBER_STATE_FAILED:
+ case BCH_MEMBER_STATE_SPARE:
/*
* XXX: need to migrate data off and set correct state
*/
diff --git a/libbcache/tier.c b/libbcache/tier.c
index 39b04f7b..46864594 100644
--- a/libbcache/tier.c
+++ b/libbcache/tier.c
@@ -8,6 +8,7 @@
#include "io.h"
#include "keylist.h"
#include "move.h"
+#include "super-io.h"
#include "tier.h"
#include <linux/freezer.h>
@@ -40,7 +41,7 @@ static bool tiering_pred(struct cache_set *c,
mi = cache_member_info_get(c);
extent_for_each_ptr(e, ptr)
- if (ptr->dev < mi->nr_in_set &&
+ if (ptr->dev < mi->nr_devices &&
mi->m[ptr->dev].tier >= s->tier_idx)
replicas++;
cache_member_info_put();
diff --git a/libbcache/vstructs.h b/libbcache/vstructs.h
new file mode 100644
index 00000000..ce2cece0
--- /dev/null
+++ b/libbcache/vstructs.h
@@ -0,0 +1,62 @@
+#ifndef _VSTRUCTS_H
+#define _VSTRUCTS_H
+
+#include "util.h"
+
+/*
+ * NOTE: we can't differentiate between __le64 and u64 with type_is - this
+ * assumes u64 is little endian:
+ */
+#define __vstruct_u64s(_s) \
+({ \
+ ( type_is((_s)->u64s, u64) ? le64_to_cpu((_s)->u64s) \
+ : type_is((_s)->u64s, u32) ? le32_to_cpu((_s)->u64s) \
+ : type_is((_s)->u64s, u16) ? le16_to_cpu((_s)->u64s) \
+ : ((_s)->u64s)); \
+})
+
+#define __vstruct_bytes(_type, _u64s) \
+({ \
+ BUILD_BUG_ON(offsetof(_type, _data) % sizeof(u64)); \
+ \
+ (offsetof(_type, _data) + (_u64s) * sizeof(u64)); \
+})
+
+#define vstruct_bytes(_s) \
+ __vstruct_bytes(typeof(*(_s)), __vstruct_u64s(_s))
+
+#define __vstruct_blocks(_type, _sector_block_bits, _u64s) \
+ (round_up(__vstruct_bytes(_type, _u64s), \
+ 512 << (_sector_block_bits)) >> (9 + (_sector_block_bits)))
+
+#define vstruct_blocks(_s, _sector_block_bits) \
+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, __vstruct_u64s(_s))
+
+#define vstruct_blocks_plus(_s, _sector_block_bits, _u64s) \
+ __vstruct_blocks(typeof(*(_s)), _sector_block_bits, \
+ __vstruct_u64s(_s) + (_u64s))
+
+#define vstruct_sectors(_s, _sector_block_bits) \
+ (round_up(vstruct_bytes(_s), 512 << (_sector_block_bits)) >> 9)
+
+#define vstruct_next(_s) \
+ ((typeof(_s)) ((_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_last(_s) \
+ ((typeof(&(_s)->start[0])) ((_s)->_data + __vstruct_u64s(_s)))
+#define vstruct_end(_s) \
+ ((void *) ((_s)->_data + __vstruct_u64s(_s)))
+
+#define vstruct_for_each(_s, _i) \
+ for (_i = (_s)->start; \
+ _i < vstruct_last(_s); \
+ _i = vstruct_next(_i))
+
+#define vstruct_for_each_safe(_s, _i, _t) \
+ for (_i = (_s)->start; \
+ _i < vstruct_last(_s) && (_t = vstruct_next(_i), true); \
+ _i = _t)
+
+#define vstruct_idx(_s, _idx) \
+ ((typeof(&(_s)->start[0])) ((_s)->_data + (_idx)))
+
+#endif /* _VSTRUCTS_H */
diff --git a/libbcache/xattr.c b/libbcache/xattr.c
index e9e0a9a7..56a8e8f8 100644
--- a/libbcache/xattr.c
+++ b/libbcache/xattr.c
@@ -9,7 +9,6 @@
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
-#include <crypto/hash.h>
struct xattr_search_key {
u8 type;
@@ -22,37 +21,13 @@ struct xattr_search_key {
static u64 bch_xattr_hash(const struct bch_hash_info *info,
const struct xattr_search_key *key)
{
- switch (info->type) {
- case BCH_STR_HASH_SHA1: {
- SHASH_DESC_ON_STACK(desc, bch_sha1);
- u8 digest[SHA1_DIGEST_SIZE];
- u64 ret;
+ struct bch_str_hash_ctx ctx;
- desc->tfm = bch_sha1;
- desc->flags = 0;
- crypto_shash_init(desc);
+ bch_str_hash_init(&ctx, info);
+ bch_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
+ bch_str_hash_update(&ctx, info, key->name.name, key->name.len);
- crypto_shash_update(desc, (void *) &info->seed, sizeof(info->seed));
-
- crypto_shash_update(desc, (void *) &key->type, sizeof(key->type));
- crypto_shash_update(desc, (void *) key->name.name, key->name.len);
-
- crypto_shash_final(desc, digest);
- memcpy(&ret, &digest, sizeof(ret));
- return ret >> 1;
- }
- default: {
- struct bch_str_hash_ctx ctx;
-
- bch_str_hash_init(&ctx, info->type);
- bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed));
-
- bch_str_hash_update(&ctx, info->type, &key->type, sizeof(key->type));
- bch_str_hash_update(&ctx, info->type, key->name.name, key->name.len);
-
- return bch_str_hash_end(&ctx, info->type);
- }
- }
+ return bch_str_hash_end(&ctx, info);
}
#define xattr_val(_xattr) ((_xattr)->x_name + (_xattr)->x_name_len)
diff --git a/linux/crypto/algapi.c b/linux/crypto/algapi.c
deleted file mode 100644
index 5e8e97b9..00000000
--- a/linux/crypto/algapi.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/*
- * Cryptographic API for algorithms (i.e., low-level API).
- *
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <linux/byteorder.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/printk.h>
-#include <linux/rtnetlink.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-
-#include "internal.h"
-
-static inline int crypto_set_driver_name(struct crypto_alg *alg)
-{
- static const char suffix[] = "-generic";
- char *driver_name = alg->cra_driver_name;
- int len;
-
- if (*driver_name)
- return 0;
-
- len = strlcpy(driver_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
- if (len + sizeof(suffix) > CRYPTO_MAX_ALG_NAME)
- return -ENAMETOOLONG;
-
- memcpy(driver_name + len, suffix, sizeof(suffix));
- return 0;
-}
-
-static int crypto_check_alg(struct crypto_alg *alg)
-{
- if (alg->cra_alignmask & (alg->cra_alignmask + 1))
- return -EINVAL;
-
- if (alg->cra_blocksize > PAGE_SIZE / 8)
- return -EINVAL;
-
- if (alg->cra_priority < 0)
- return -EINVAL;
-
- atomic_set(&alg->cra_refcnt, 1);
-
- return crypto_set_driver_name(alg);
-}
-
-static int __crypto_register_alg(struct crypto_alg *alg)
-{
- struct crypto_alg *q;
- int ret = -EAGAIN;
-
- INIT_LIST_HEAD(&alg->cra_users);
-
- ret = -EEXIST;
-
- list_for_each_entry(q, &crypto_alg_list, cra_list) {
- if (q == alg)
- goto err;
-
- if (!strcmp(q->cra_driver_name, alg->cra_name) ||
- !strcmp(q->cra_name, alg->cra_driver_name))
- goto err;
- }
-
- list_add(&alg->cra_list, &crypto_alg_list);
- return 0;
-err:
- return ret;
-}
-
-void crypto_remove_final(struct list_head *list)
-{
- struct crypto_alg *alg;
- struct crypto_alg *n;
-
- list_for_each_entry_safe(alg, n, list, cra_list) {
- list_del_init(&alg->cra_list);
- crypto_alg_put(alg);
- }
-}
-
-int crypto_register_alg(struct crypto_alg *alg)
-{
- int err;
-
- err = crypto_check_alg(alg);
- if (err)
- return err;
-
- down_write(&crypto_alg_sem);
- err = __crypto_register_alg(alg);
- up_write(&crypto_alg_sem);
-
- return err;
-}
-
-static int crypto_remove_alg(struct crypto_alg *alg, struct list_head *list)
-{
- if (unlikely(list_empty(&alg->cra_list)))
- return -ENOENT;
-
- list_del_init(&alg->cra_list);
- return 0;
-}
-
-int crypto_unregister_alg(struct crypto_alg *alg)
-{
- int ret;
- LIST_HEAD(list);
-
- down_write(&crypto_alg_sem);
- ret = crypto_remove_alg(alg, &list);
- up_write(&crypto_alg_sem);
-
- if (ret)
- return ret;
-
- BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
- if (alg->cra_destroy)
- alg->cra_destroy(alg);
-
- crypto_remove_final(&list);
- return 0;
-}
-
-int crypto_register_algs(struct crypto_alg *algs, int count)
-{
- int i, ret;
-
- for (i = 0; i < count; i++) {
- ret = crypto_register_alg(&algs[i]);
- if (ret)
- goto err;
- }
-
- return 0;
-
-err:
- for (--i; i >= 0; --i)
- crypto_unregister_alg(&algs[i]);
-
- return ret;
-}
-
-int crypto_unregister_algs(struct crypto_alg *algs, int count)
-{
- int i, ret;
-
- for (i = 0; i < count; i++) {
- ret = crypto_unregister_alg(&algs[i]);
- if (ret)
- pr_err("Failed to unregister %s %s: %d\n",
- algs[i].cra_driver_name, algs[i].cra_name, ret);
- }
-
- return 0;
-}
-
-struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb)
-{
- struct rtattr *rta = tb[0];
- struct crypto_attr_type *algt;
-
- if (!rta)
- return ERR_PTR(-ENOENT);
- if (RTA_PAYLOAD(rta) < sizeof(*algt))
- return ERR_PTR(-EINVAL);
- if (rta->rta_type != CRYPTOA_TYPE)
- return ERR_PTR(-EINVAL);
-
- algt = RTA_DATA(rta);
-
- return algt;
-}
-
-int crypto_check_attr_type(struct rtattr **tb, u32 type)
-{
- struct crypto_attr_type *algt;
-
- algt = crypto_get_attr_type(tb);
- if (IS_ERR(algt))
- return PTR_ERR(algt);
-
- if ((algt->type ^ type) & algt->mask)
- return -EINVAL;
-
- return 0;
-}
-
-const char *crypto_attr_alg_name(struct rtattr *rta)
-{
- struct crypto_attr_alg *alga;
-
- if (!rta)
- return ERR_PTR(-ENOENT);
- if (RTA_PAYLOAD(rta) < sizeof(*alga))
- return ERR_PTR(-EINVAL);
- if (rta->rta_type != CRYPTOA_ALG)
- return ERR_PTR(-EINVAL);
-
- alga = RTA_DATA(rta);
- alga->name[CRYPTO_MAX_ALG_NAME - 1] = 0;
-
- return alga->name;
-}
-
-struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
- const struct crypto_type *frontend,
- u32 type, u32 mask)
-{
- const char *name;
-
- name = crypto_attr_alg_name(rta);
- if (IS_ERR(name))
- return ERR_CAST(name);
-
- return crypto_find_alg(name, frontend, type, mask);
-}
-
-int crypto_attr_u32(struct rtattr *rta, u32 *num)
-{
- struct crypto_attr_u32 *nu32;
-
- if (!rta)
- return -ENOENT;
- if (RTA_PAYLOAD(rta) < sizeof(*nu32))
- return -EINVAL;
- if (rta->rta_type != CRYPTOA_U32)
- return -EINVAL;
-
- nu32 = RTA_DATA(rta);
- *num = nu32->num;
-
- return 0;
-}
-
-static inline void crypto_inc_byte(u8 *a, unsigned int size)
-{
- u8 *b = (a + size);
- u8 c;
-
- for (; size; size--) {
- c = *--b + 1;
- *b = c;
- if (c)
- break;
- }
-}
-
-void crypto_inc(u8 *a, unsigned int size)
-{
- __be32 *b = (__be32 *)(a + size);
- u32 c;
-
- for (; size >= 4; size -= 4) {
- c = be32_to_cpu(*--b) + 1;
- *b = cpu_to_be32(c);
- if (c)
- return;
- }
-
- crypto_inc_byte(a, size);
-}
-
-static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
-{
- for (; size; size--)
- *a++ ^= *b++;
-}
-
-void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
-{
- u32 *a = (u32 *)dst;
- u32 *b = (u32 *)src;
-
- for (; size >= 4; size -= 4)
- *a++ ^= *b++;
-
- crypto_xor_byte((u8 *)a, (u8 *)b, size);
-}
-
-unsigned int crypto_alg_extsize(struct crypto_alg *alg)
-{
- return alg->cra_ctxsize +
- (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
-}
-
-int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
- u32 type, u32 mask)
-{
- int ret = 0;
- struct crypto_alg *alg = crypto_find_alg(name, frontend, type, mask);
-
- if (!IS_ERR(alg)) {
- crypto_alg_put(alg);
- ret = 1;
- }
-
- return ret;
-}
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Cryptographic algorithms API");
diff --git a/linux/crypto/api.c b/linux/crypto/api.c
index 513a48aa..2d24630e 100644
--- a/linux/crypto/api.c
+++ b/linux/crypto/api.c
@@ -1,12 +1,7 @@
/*
- * Scatterlist Cryptographic API.
+ * Cryptographic API for algorithms (i.e., low-level API).
*
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * Copyright (c) 2002 David S. Miller (davem@redhat.com)
- * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no>
- * and Nettle, by Niels Möller.
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
@@ -19,183 +14,80 @@
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/param.h>
-#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/string.h>
+
+#include <crypto/algapi.h>
#include "internal.h"
-LIST_HEAD(crypto_alg_list);
-DECLARE_RWSEM(crypto_alg_sem);
+static LIST_HEAD(crypto_alg_list);
+static DECLARE_RWSEM(crypto_alg_sem);
-static struct crypto_alg *__crypto_alg_lookup(const char *name, u32 type,
- u32 mask)
+static unsigned crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
{
- struct crypto_alg *q, *alg = NULL;
- int best = -2;
-
- list_for_each_entry(q, &crypto_alg_list, cra_list) {
- int exact, fuzzy;
-
- if ((q->cra_flags ^ type) & mask)
- continue;
-
- exact = !strcmp(q->cra_driver_name, name);
- fuzzy = !strcmp(q->cra_name, name);
- if (!exact && !(fuzzy && q->cra_priority > best))
- continue;
-
- if (unlikely(!crypto_alg_get(q)))
- continue;
-
- best = q->cra_priority;
- if (alg)
- crypto_alg_put(alg);
- alg = q;
-
- if (exact)
- break;
- }
+ return alg->cra_type->ctxsize(alg, type, mask);
+}
- return alg;
+unsigned crypto_alg_extsize(struct crypto_alg *alg)
+{
+ return alg->cra_ctxsize;
}
struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
{
struct crypto_alg *alg;
- /*
- * If the internal flag is set for a cipher, require a caller to
- * to invoke the cipher with the internal flag to use that cipher.
- * Also, if a caller wants to allocate a cipher that may or may
- * not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and
- * !(mask & CRYPTO_ALG_INTERNAL).
- */
- if (!((type | mask) & CRYPTO_ALG_INTERNAL))
- mask |= CRYPTO_ALG_INTERNAL;
-
down_read(&crypto_alg_sem);
- alg = __crypto_alg_lookup(name, type, mask);
- up_read(&crypto_alg_sem);
+ list_for_each_entry(alg, &crypto_alg_list, cra_list)
+ if (!((alg->cra_flags ^ type) & mask) &&
+ !strcmp(alg->cra_name, name))
+ goto found;
- return alg ?: ERR_PTR(-ENOENT);
-}
-
-static int crypto_init_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
- const struct crypto_type *type_obj = tfm->__crt_alg->cra_type;
-
- if (type_obj)
- return type_obj->init(tfm, type, mask);
-
- switch (crypto_tfm_alg_type(tfm)) {
- case CRYPTO_ALG_TYPE_CIPHER:
- return crypto_init_cipher_ops(tfm);
- default:
- break;
- }
+ alg = ERR_PTR(-ENOENT);
+found:
+ up_read(&crypto_alg_sem);
- BUG();
- return -EINVAL;
+ return alg;
}
static void crypto_exit_ops(struct crypto_tfm *tfm)
{
- const struct crypto_type *type = tfm->__crt_alg->cra_type;
-
- if (type) {
- if (tfm->exit)
- tfm->exit(tfm);
- return;
- }
-
- switch (crypto_tfm_alg_type(tfm)) {
- case CRYPTO_ALG_TYPE_CIPHER:
- crypto_exit_cipher_ops(tfm);
- break;
-
- default:
- BUG();
- }
-}
-
-static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
-{
- const struct crypto_type *type_obj = alg->cra_type;
- unsigned int len;
-
- len = alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1);
- if (type_obj)
- return len + type_obj->ctxsize(alg, type, mask);
-
- switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
- default:
- BUG();
-
- case CRYPTO_ALG_TYPE_CIPHER:
- len += crypto_cipher_ctxsize(alg);
- break;
- }
-
- return len;
+ if (tfm->exit)
+ tfm->exit(tfm);
}
-struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
- u32 mask)
+static struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg,
+ u32 type, u32 mask)
{
struct crypto_tfm *tfm = NULL;
- unsigned int tfm_size;
+ unsigned tfm_size;
int err = -ENOMEM;
tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
tfm = kzalloc(tfm_size, GFP_KERNEL);
if (tfm == NULL)
- goto out_err;
+ return ERR_PTR(-ENOMEM);
tfm->__crt_alg = alg;
- err = crypto_init_ops(tfm, type, mask);
+ err = alg->cra_type->init(tfm, type, mask);
if (err)
goto out_free_tfm;
if (!tfm->exit && alg->cra_init && (err = alg->cra_init(tfm)))
goto cra_init_failed;
- goto out;
+ return tfm;
cra_init_failed:
crypto_exit_ops(tfm);
out_free_tfm:
kfree(tfm);
-out_err:
- tfm = ERR_PTR(err);
-out:
- return tfm;
+ return ERR_PTR(err);
}
-/*
- * crypto_alloc_base - Locate algorithm and allocate transform
- * @alg_name: Name of algorithm
- * @type: Type of algorithm
- * @mask: Mask for type comparison
- *
- * This function should not be used by new algorithm types.
- * Please use crypto_alloc_tfm instead.
- *
- * crypto_alloc_base() will first attempt to locate an already loaded
- * algorithm. If that fails and the kernel supports dynamically loadable
- * modules, it will then attempt to load a module of the same name or
- * alias. If that fails it will send a query to any loaded crypto manager
- * to construct an algorithm on the fly. A refcount is grabbed on the
- * algorithm which is then associated with the new transform.
- *
- * The returned transform is of a non-determinate type. Most people
- * should use one of the more specific allocation functions such as
- * crypto_alloc_blkcipher.
- *
- * In case of error the return value is an error pointer.
- */
struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask)
{
struct crypto_alg *alg;
@@ -208,31 +100,29 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask)
}
tfm = __crypto_alloc_tfm(alg, type, mask);
- if (IS_ERR(tfm)) {
- crypto_alg_put(alg);
+ if (IS_ERR(tfm))
return tfm;
- }
return tfm;
}
-void *crypto_create_tfm(struct crypto_alg *alg,
- const struct crypto_type *frontend)
+static void *crypto_create_tfm(struct crypto_alg *alg,
+ const struct crypto_type *frontend)
{
- char *mem;
struct crypto_tfm *tfm = NULL;
- unsigned int tfmsize;
- unsigned int total;
+ unsigned tfmsize;
+ unsigned total;
+ void *mem;
int err = -ENOMEM;
tfmsize = frontend->tfmsize;
total = tfmsize + sizeof(*tfm) + frontend->extsize(alg);
mem = kzalloc(total, GFP_KERNEL);
- if (mem == NULL)
+ if (!mem)
goto out_err;
- tfm = (struct crypto_tfm *)(mem + tfmsize);
+ tfm = mem + tfmsize;
tfm->__crt_alg = alg;
err = frontend->init_tfm(tfm);
@@ -254,28 +144,23 @@ out:
return mem;
}
-struct crypto_alg *crypto_find_alg(const char *alg_name,
- const struct crypto_type *frontend,
- u32 type, u32 mask)
+static struct crypto_alg *crypto_find_alg(const char *alg_name,
+ const struct crypto_type *frontend,
+ u32 type, u32 mask)
{
- struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask) =
- crypto_alg_mod_lookup;
-
if (frontend) {
type &= frontend->maskclear;
mask &= frontend->maskclear;
type |= frontend->type;
mask |= frontend->maskset;
-
- if (frontend->lookup)
- lookup = frontend->lookup;
}
- return lookup(alg_name, type, mask);
+ return crypto_alg_mod_lookup(alg_name, type, mask);
}
void *crypto_alloc_tfm(const char *alg_name,
- const struct crypto_type *frontend, u32 type, u32 mask)
+ const struct crypto_type *frontend,
+ u32 type, u32 mask)
{
struct crypto_alg *alg;
void *tfm;
@@ -285,10 +170,8 @@ void *crypto_alloc_tfm(const char *alg_name,
return ERR_CAST(alg);
tfm = crypto_create_tfm(alg, frontend);
- if (IS_ERR(tfm)) {
- crypto_alg_put(alg);
+ if (IS_ERR(tfm))
return tfm;
- }
return tfm;
}
@@ -305,22 +188,16 @@ void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm)
if (!tfm->exit && alg->cra_exit)
alg->cra_exit(tfm);
crypto_exit_ops(tfm);
- crypto_alg_put(alg);
kzfree(mem);
}
-int crypto_has_alg(const char *name, u32 type, u32 mask)
+int crypto_register_alg(struct crypto_alg *alg)
{
- int ret = 0;
- struct crypto_alg *alg = crypto_alg_mod_lookup(name, type, mask);
+ INIT_LIST_HEAD(&alg->cra_users);
- if (!IS_ERR(alg)) {
- crypto_alg_put(alg);
- ret = 1;
- }
+ down_write(&crypto_alg_sem);
+ list_add(&alg->cra_list, &crypto_alg_list);
+ up_write(&crypto_alg_sem);
- return ret;
+ return 0;
}
-
-MODULE_DESCRIPTION("Cryptographic core API");
-MODULE_LICENSE("GPL");
diff --git a/linux/crypto/blkcipher.c b/linux/crypto/blkcipher.c
new file mode 100644
index 00000000..31f91418
--- /dev/null
+++ b/linux/crypto/blkcipher.c
@@ -0,0 +1,47 @@
+/*
+ * Block chaining cipher operations.
+ *
+ * Generic encrypt/decrypt wrapper for ciphers, handles operations across
+ * multiple page boundaries by using temporary blocks. In user context,
+ * the kernel is given a chance to schedule us once per page.
+ *
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <crypto/algapi.h>
+#include "internal.h"
+
+static unsigned crypto_blkcipher_ctxsize(struct crypto_alg *alg,
+ u32 type, u32 mask)
+{
+ return alg->cra_ctxsize;
+}
+
+static int crypto_init_blkcipher_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+{
+ struct blkcipher_tfm *crt = &tfm->crt_blkcipher;
+ struct blkcipher_alg *alg = &tfm->__crt_alg->cra_blkcipher;
+
+ BUG_ON((mask & CRYPTO_ALG_TYPE_MASK) != CRYPTO_ALG_TYPE_MASK);
+
+ crt->setkey = alg->setkey;
+ crt->encrypt = alg->encrypt;
+ crt->decrypt = alg->decrypt;
+ return 0;
+}
+
+const struct crypto_type crypto_blkcipher_type = {
+ .ctxsize = crypto_blkcipher_ctxsize,
+ .init = crypto_init_blkcipher_ops,
+};
diff --git a/linux/crypto/chacha20_generic.c b/linux/crypto/chacha20_generic.c
new file mode 100644
index 00000000..7ac68321
--- /dev/null
+++ b/linux/crypto/chacha20_generic.c
@@ -0,0 +1,99 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/scatterlist.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+
+#include <sodium/crypto_stream_chacha20.h>
+
+struct chacha20_ctx {
+ u32 key[8];
+};
+
+static int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
+ int i;
+
+ if (keysize != CHACHA20_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ return 0;
+}
+
+static int crypto_chacha20_crypt(struct blkcipher_desc *desc,
+ struct scatterlist *dst,
+ struct scatterlist *src,
+ unsigned nbytes)
+{
+ struct chacha20_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct scatterlist *sg = src;
+ u32 iv[4];
+ int ret;
+
+ BUG_ON(src != dst);
+
+ memcpy(iv, desc->info, sizeof(iv));
+
+ while (1) {
+ ret = crypto_stream_chacha20_xor_ic(sg_virt(sg),
+ sg_virt(sg),
+ sg->length,
+ (void *) &iv[2],
+ iv[0] | ((u64) iv[1] << 32),
+ (void *) ctx->key);
+ BUG_ON(ret);
+
+ nbytes -= sg->length;
+
+ if (sg_is_last(sg))
+ break;
+
+ BUG_ON(sg->length % CHACHA20_BLOCK_SIZE);
+ iv[0] += sg->length / CHACHA20_BLOCK_SIZE;
+ sg = sg_next(sg);
+ };
+
+ BUG_ON(nbytes);
+
+ return 0;
+}
+
+static struct crypto_alg alg = {
+ .cra_name = "chacha20",
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_ctxsize = sizeof(struct chacha20_ctx),
+ .cra_u = {
+ .blkcipher = {
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = crypto_chacha20_crypt,
+ .decrypt = crypto_chacha20_crypt,
+ },
+ },
+};
+
+__attribute__((constructor(110)))
+static int chacha20_generic_mod_init(void)
+{
+ return crypto_register_alg(&alg);
+}
diff --git a/linux/crypto/cipher.c b/linux/crypto/cipher.c
deleted file mode 100644
index 6f47ac6c..00000000
--- a/linux/crypto/cipher.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Cipher operations.
- *
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/crypto.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include "internal.h"
-
-static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
- unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
- int ret;
- u8 *buffer, *alignbuffer;
- unsigned long absize;
-
- absize = keylen + alignmask;
- buffer = kmalloc(absize, GFP_ATOMIC);
- if (!buffer)
- return -ENOMEM;
-
- alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
- memcpy(alignbuffer, key, keylen);
- ret = cia->cia_setkey(tfm, alignbuffer, keylen);
- memset(alignbuffer, 0, keylen);
- kfree(buffer);
- return ret;
-
-}
-
-static int setkey_default(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct cipher_alg *cia = &tfm->__crt_alg->cra_cipher;
- unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- if (keylen < cia->cia_min_keysize || keylen > cia->cia_max_keysize) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
-
- if ((unsigned long)key & alignmask)
- return setkey_unaligned(tfm, key, keylen);
-
- return cia->cia_setkey(tfm, key, keylen);
-}
-
-static void cipher_crypt_unaligned(void (*fn)(struct crypto_tfm *, u8 *,
- const u8 *),
- struct crypto_tfm *tfm,
- u8 *dst, const u8 *src)
-{
- unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
- unsigned int size = crypto_tfm_alg_blocksize(tfm);
- u8 buffer[size + alignmask];
- u8 *tmp = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-
- memcpy(tmp, src, size);
- fn(tfm, tmp, tmp);
- memcpy(dst, tmp, size);
-}
-
-static void cipher_encrypt_unaligned(struct crypto_tfm *tfm,
- u8 *dst, const u8 *src)
-{
- unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
- struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
-
- if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
- cipher_crypt_unaligned(cipher->cia_encrypt, tfm, dst, src);
- return;
- }
-
- cipher->cia_encrypt(tfm, dst, src);
-}
-
-static void cipher_decrypt_unaligned(struct crypto_tfm *tfm,
- u8 *dst, const u8 *src)
-{
- unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
- struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
-
- if (unlikely(((unsigned long)dst | (unsigned long)src) & alignmask)) {
- cipher_crypt_unaligned(cipher->cia_decrypt, tfm, dst, src);
- return;
- }
-
- cipher->cia_decrypt(tfm, dst, src);
-}
-
-int crypto_init_cipher_ops(struct crypto_tfm *tfm)
-{
- struct cipher_tfm *ops = &tfm->crt_cipher;
- struct cipher_alg *cipher = &tfm->__crt_alg->cra_cipher;
-
- ops->cit_setkey = setkey_default;
- ops->cit_encrypt_one = crypto_tfm_alg_alignmask(tfm) ?
- cipher_encrypt_unaligned : cipher->cia_encrypt;
- ops->cit_decrypt_one = crypto_tfm_alg_alignmask(tfm) ?
- cipher_decrypt_unaligned : cipher->cia_decrypt;
-
- return 0;
-}
-
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/linux/crypto/internal.h b/linux/crypto/internal.h
index b00dcea2..5b21f836 100644
--- a/linux/crypto/internal.h
+++ b/linux/crypto/internal.h
@@ -13,66 +13,11 @@
#ifndef _CRYPTO_INTERNAL_H
#define _CRYPTO_INTERNAL_H
-#include <crypto/algapi.h>
-#include <linux/completion.h>
-#include <linux/mm.h>
-#include <linux/list.h>
-#include <linux/kernel.h>
-#include <linux/notifier.h>
-#include <linux/rwsem.h>
-#include <linux/slab.h>
+struct crypto_type;
+struct crypto_alg;
-struct crypto_instance;
-struct crypto_template;
-
-struct crypto_larval {
- struct crypto_alg alg;
- struct crypto_alg *adult;
- struct completion completion;
- u32 mask;
-};
-
-extern struct list_head crypto_alg_list;
-extern struct rw_semaphore crypto_alg_sem;
-
-static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg)
-{
- return alg->cra_ctxsize;
-}
-
-int crypto_init_cipher_ops(struct crypto_tfm *tfm);
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
-
-void crypto_remove_final(struct list_head *list);
-struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
- u32 mask);
-void *crypto_create_tfm(struct crypto_alg *alg,
- const struct crypto_type *frontend);
-struct crypto_alg *crypto_find_alg(const char *alg_name,
- const struct crypto_type *frontend,
- u32 type, u32 mask);
-void *crypto_alloc_tfm(const char *alg_name,
- const struct crypto_type *frontend, u32 type, u32 mask);
-
-int crypto_register_notifier(struct notifier_block *nb);
-int crypto_unregister_notifier(struct notifier_block *nb);
-
-unsigned int crypto_alg_extsize(struct crypto_alg *alg);
-
-int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
- u32 type, u32 mask);
-
-static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
-{
- atomic_inc(&alg->cra_refcnt);
- return alg;
-}
-
-static inline void crypto_alg_put(struct crypto_alg *alg)
-{
- if (atomic_dec_and_test(&alg->cra_refcnt) && alg->cra_destroy)
- alg->cra_destroy(alg);
-}
+void *crypto_alloc_tfm(const char *, const struct crypto_type *, u32, u32);
+unsigned int crypto_alg_extsize(struct crypto_alg *);
#endif /* _CRYPTO_INTERNAL_H */
diff --git a/linux/crypto/poly1305_generic.c b/linux/crypto/poly1305_generic.c
new file mode 100644
index 00000000..5d385d54
--- /dev/null
+++ b/linux/crypto/poly1305_generic.c
@@ -0,0 +1,76 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/byteorder.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/poly1305.h>
+
+struct poly1305_desc_ctx {
+ bool key_done;
+ crypto_onetimeauth_poly1305_state s;
+};
+
+
+static int poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *state = shash_desc_ctx(desc);
+
+ state->key_done = false;
+ return 0;
+}
+
+static int poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned len)
+{
+ struct poly1305_desc_ctx *state = shash_desc_ctx(desc);
+
+ if (!state->key_done) {
+ BUG_ON(len != crypto_onetimeauth_poly1305_KEYBYTES);
+
+ state->key_done = true;
+ return crypto_onetimeauth_poly1305_init(&state->s, src);
+ }
+
+ return crypto_onetimeauth_poly1305_update(&state->s, src, len);
+}
+
+static int poly1305_final(struct shash_desc *desc, u8 *out)
+{
+ struct poly1305_desc_ctx *state = shash_desc_ctx(desc);
+
+ return crypto_onetimeauth_poly1305_final(&state->s, out);
+}
+
+static struct shash_alg poly1305_alg = {
+ .digestsize = crypto_onetimeauth_poly1305_BYTES,
+ .init = poly1305_init,
+ .update = poly1305_update,
+ .final = poly1305_final,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+ .base = {
+ .cra_name = "poly1305",
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ },
+};
+
+__attribute__((constructor(110)))
+static int poly1305_mod_init(void)
+{
+ return crypto_register_shash(&poly1305_alg);
+}
diff --git a/linux/crypto/sha1_generic.c b/linux/crypto/sha1_generic.c
deleted file mode 100644
index 31b5d12e..00000000
--- a/linux/crypto/sha1_generic.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Cryptographic API.
- *
- * SHA1 Secure Hash Algorithm.
- *
- * Derived from cryptoapi implementation, adapted for in-place
- * scatterlist interface.
- *
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-#include <crypto/internal/hash.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/sha.h>
-#include <crypto/sha1_base.h>
-#include <asm/byteorder.h>
-
-const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = {
- 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
- 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
- 0xaf, 0xd8, 0x07, 0x09
-};
-
-static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
- int blocks)
-{
- u32 temp[SHA_WORKSPACE_WORDS];
-
- while (blocks--) {
- sha_transform(sst->state, src, temp);
- src += SHA1_BLOCK_SIZE;
- }
- memzero_explicit(temp, sizeof(temp));
-}
-
-int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
-}
-
-static int sha1_final(struct shash_desc *desc, u8 *out)
-{
- sha1_base_do_finalize(desc, sha1_generic_block_fn);
- return sha1_base_finish(desc, out);
-}
-
-int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
- return sha1_final(desc, out);
-}
-
-static struct shash_alg alg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_base_init,
- .update = crypto_sha1_update,
- .final = sha1_final,
- .finup = crypto_sha1_finup,
- .descsize = sizeof(struct sha1_state),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name= "sha1-generic",
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-__attribute__((constructor(110)))
-static int __init sha1_generic_mod_init(void)
-{
- return crypto_register_shash(&alg);
-}
diff --git a/linux/crypto/sha256_generic.c b/linux/crypto/sha256_generic.c
new file mode 100644
index 00000000..0bd272f0
--- /dev/null
+++ b/linux/crypto/sha256_generic.c
@@ -0,0 +1,69 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/byteorder.h>
+#include <linux/types.h>
+#include <asm/unaligned.h>
+
+#include <linux/crypto.h>
+#include <crypto/internal/hash.h>
+
+#include <sodium/crypto_hash_sha256.h>
+
+static int sha256_init(struct shash_desc *desc)
+{
+ crypto_hash_sha256_state *state = shash_desc_ctx(desc);
+
+ return crypto_hash_sha256_init(state);
+}
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ crypto_hash_sha256_state *state = shash_desc_ctx(desc);
+
+ return crypto_hash_sha256_update(state, data, len);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ crypto_hash_sha256_state *state = shash_desc_ctx(desc);
+
+ return crypto_hash_sha256_final(state, out);
+}
+
+static struct shash_alg sha256_alg = {
+ .digestsize = crypto_hash_sha256_BYTES,
+ .init = sha256_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .descsize = sizeof(crypto_hash_sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ }
+};
+
+__attribute__((constructor(110)))
+static int __init sha256_generic_mod_init(void)
+{
+ return crypto_register_shash(&sha256_alg);
+}
diff --git a/linux/crypto/shash.c b/linux/crypto/shash.c
index 406ddfe8..4f07a8b8 100644
--- a/linux/crypto/shash.c
+++ b/linux/crypto/shash.c
@@ -13,181 +13,25 @@
#include <crypto/internal/hash.h>
#include <linux/err.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include "internal.h"
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- return -ENOSYS;
-}
-
-static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
- unsigned long absize;
- u8 *buffer, *alignbuffer;
- int err;
-
- absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1));
- buffer = kmalloc(absize, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
- memcpy(alignbuffer, key, keylen);
- err = shash->setkey(tfm, alignbuffer, keylen);
- kzfree(buffer);
- return err;
-}
-
-int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
-
- if ((unsigned long)key & alignmask)
- return shash_setkey_unaligned(tfm, key, keylen);
-
- return shash->setkey(tfm, key, keylen);
-}
-
-static inline unsigned int shash_align_buffer_size(unsigned len,
- unsigned long mask)
-{
- typedef u8 __attribute__ ((aligned)) u8_aligned;
- return len + (mask & ~(__alignof__(u8_aligned) - 1));
-}
-
-static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
- unsigned int unaligned_len = alignmask + 1 -
- ((unsigned long)data & alignmask);
- u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
- __attribute__ ((aligned));
- u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
- int err;
-
- if (unaligned_len > len)
- unaligned_len = len;
-
- memcpy(buf, data, unaligned_len);
- err = shash->update(desc, buf, unaligned_len);
- memset(buf, 0, unaligned_len);
-
- return err ?:
- shash->update(desc, data + unaligned_len, len - unaligned_len);
-}
-
-int crypto_shash_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
-
- if ((unsigned long)data & alignmask)
- return shash_update_unaligned(desc, data, len);
-
- return shash->update(desc, data, len);
-}
-
-static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
-{
- struct crypto_shash *tfm = desc->tfm;
- unsigned long alignmask = crypto_shash_alignmask(tfm);
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned int ds = crypto_shash_digestsize(tfm);
- u8 ubuf[shash_align_buffer_size(ds, alignmask)]
- __attribute__ ((aligned));
- u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
- int err;
-
- err = shash->final(desc, buf);
- if (err)
- goto out;
-
- memcpy(out, buf, ds);
-
-out:
- memset(buf, 0, ds);
- return err;
-}
-
-int crypto_shash_final(struct shash_desc *desc, u8 *out)
-{
- struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
-
- if ((unsigned long)out & alignmask)
- return shash_final_unaligned(desc, out);
-
- return shash->final(desc, out);
-}
-
-static int shash_finup_unaligned(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int shash_finup(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
{
return crypto_shash_update(desc, data, len) ?:
crypto_shash_final(desc, out);
}
-int crypto_shash_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
-
- if (((unsigned long)data | (unsigned long)out) & alignmask)
- return shash_finup_unaligned(desc, data, len, out);
-
- return shash->finup(desc, data, len, out);
-}
-
-static int shash_digest_unaligned(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int shash_digest(struct shash_desc *desc, const u8 *data,
+ unsigned len, u8 *out)
{
return crypto_shash_init(desc) ?:
crypto_shash_finup(desc, data, len, out);
}
-int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct crypto_shash *tfm = desc->tfm;
- struct shash_alg *shash = crypto_shash_alg(tfm);
- unsigned long alignmask = crypto_shash_alignmask(tfm);
-
- if (((unsigned long)data | (unsigned long)out) & alignmask)
- return shash_digest_unaligned(desc, data, len, out);
-
- return shash->digest(desc, data, len, out);
-}
-
-static int shash_default_export(struct shash_desc *desc, void *out)
-{
- memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(desc->tfm));
- return 0;
-}
-
-static int shash_default_import(struct shash_desc *desc, const void *in)
-{
- memcpy(shash_desc_ctx(desc), in, crypto_shash_descsize(desc->tfm));
- return 0;
-}
-
static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_shash *hash = __crypto_shash_cast(tfm);
@@ -197,98 +41,32 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
}
static const struct crypto_type crypto_shash_type = {
- .extsize = crypto_alg_extsize,
- .init_tfm = crypto_shash_init_tfm,
- .maskclear = ~CRYPTO_ALG_TYPE_MASK,
- .maskset = CRYPTO_ALG_TYPE_MASK,
- .type = CRYPTO_ALG_TYPE_SHASH,
- .tfmsize = offsetof(struct crypto_shash, base),
+ .extsize = crypto_alg_extsize,
+ .init_tfm = crypto_shash_init_tfm,
+ .maskclear = ~CRYPTO_ALG_TYPE_MASK,
+ .maskset = CRYPTO_ALG_TYPE_MASK,
+ .type = CRYPTO_ALG_TYPE_SHASH,
+ .tfmsize = offsetof(struct crypto_shash, base),
};
-struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
- u32 mask)
+struct crypto_shash *crypto_alloc_shash(const char *alg_name,
+ u32 type, u32 mask)
{
return crypto_alloc_tfm(alg_name, &crypto_shash_type, type, mask);
}
-static int shash_prepare_alg(struct shash_alg *alg)
+int crypto_register_shash(struct shash_alg *alg)
{
struct crypto_alg *base = &alg->base;
- if (alg->digestsize > PAGE_SIZE / 8 ||
- alg->descsize > PAGE_SIZE / 8 ||
- alg->statesize > PAGE_SIZE / 8)
- return -EINVAL;
-
base->cra_type = &crypto_shash_type;
base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
base->cra_flags |= CRYPTO_ALG_TYPE_SHASH;
if (!alg->finup)
- alg->finup = shash_finup_unaligned;
+ alg->finup = shash_finup;
if (!alg->digest)
- alg->digest = shash_digest_unaligned;
- if (!alg->export) {
- alg->export = shash_default_export;
- alg->import = shash_default_import;
- alg->statesize = alg->descsize;
- }
- if (!alg->setkey)
- alg->setkey = shash_no_setkey;
-
- return 0;
-}
-
-int crypto_register_shash(struct shash_alg *alg)
-{
- struct crypto_alg *base = &alg->base;
- int err;
-
- err = shash_prepare_alg(alg);
- if (err)
- return err;
+ alg->digest = shash_digest;
return crypto_register_alg(base);
}
-
-int crypto_unregister_shash(struct shash_alg *alg)
-{
- return crypto_unregister_alg(&alg->base);
-}
-
-int crypto_register_shashes(struct shash_alg *algs, int count)
-{
- int i, ret;
-
- for (i = 0; i < count; i++) {
- ret = crypto_register_shash(&algs[i]);
- if (ret)
- goto err;
- }
-
- return 0;
-
-err:
- for (--i; i >= 0; --i)
- crypto_unregister_shash(&algs[i]);
-
- return ret;
-}
-
-int crypto_unregister_shashes(struct shash_alg *algs, int count)
-{
- int i, ret;
-
- for (i = count - 1; i >= 0; --i) {
- ret = crypto_unregister_shash(&algs[i]);
- if (ret)
- pr_err("Failed to unregister %s %s: %d\n",
- algs[i].base.cra_driver_name,
- algs[i].base.cra_name, ret);
- }
-
- return 0;
-}
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/linux/lz4hc_compress.c b/linux/lz4hc_compress.c
deleted file mode 100644
index b64ded0d..00000000
--- a/linux/lz4hc_compress.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
- *
- * Changed for kernel use by:
- * Chanho Min <chanho.min@lge.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/lz4.h>
-#include <asm/unaligned.h>
-#include "lz4defs.h"
-
-struct lz4hc_data {
- const u8 *base;
- HTYPE hashtable[HASHTABLESIZE];
- u16 chaintable[MAXD];
- const u8 *nexttoupdate;
-} __attribute__((__packed__));
-
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
-{
- memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
- memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
- hc4->nexttoupdate = base + 1;
-#else
- hc4->nexttoupdate = base;
-#endif
- hc4->base = base;
- return 1;
-}
-
-/* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
-{
- u16 *chaintable = hc4->chaintable;
- HTYPE *hashtable = hc4->hashtable;
-#if LZ4_ARCH64
- const u8 * const base = hc4->base;
-#else
- const int base = 0;
-#endif
-
- while (hc4->nexttoupdate < ip) {
- const u8 *p = hc4->nexttoupdate;
- size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
- if (delta > MAX_DISTANCE)
- delta = MAX_DISTANCE;
- chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
- hashtable[HASH_VALUE(p)] = (p) - base;
- hc4->nexttoupdate++;
- }
-}
-
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
- const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
-{
- u16 *const chaintable = hc4->chaintable;
- HTYPE *const hashtable = hc4->hashtable;
- const u8 *ref;
-#if LZ4_ARCH64
- const u8 * const base = hc4->base;
-#else
- const int base = 0;
-#endif
- int nbattempts = MAX_NB_ATTEMPTS;
- size_t repl = 0, ml = 0;
- u16 delta;
-
- /* HC4 match finder */
- lz4hc_insert(hc4, ip);
- ref = hashtable[HASH_VALUE(ip)] + base;
-
- /* potential repetition */
- if (ref >= ip-4) {
- /* confirmed */
- if (A32(ref) == A32(ip)) {
- delta = (u16)(ip-ref);
- repl = ml = common_length(ip + MINMATCH,
- ref + MINMATCH, matchlimit) + MINMATCH;
- *matchpos = ref;
- }
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
- }
-
- while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
- nbattempts--;
- if (*(ref + ml) == *(ip + ml)) {
- if (A32(ref) == A32(ip)) {
- size_t mlt =
- common_length(ip + MINMATCH,
- ref + MINMATCH, matchlimit) + MINMATCH;
- if (mlt > ml) {
- ml = mlt;
- *matchpos = ref;
- }
- }
- }
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
- }
-
- /* Complete table */
- if (repl) {
- const u8 *ptr = ip;
- const u8 *end;
- end = ip + repl - (MINMATCH-1);
- /* Pre-Load */
- while (ptr < end - delta) {
- chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
- ptr++;
- }
- do {
- chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
- /* Head of chain */
- hashtable[HASH_VALUE(ptr)] = (ptr) - base;
- ptr++;
- } while (ptr < end);
- hc4->nexttoupdate = end;
- }
-
- return (int)ml;
-}
-
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
- const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
- const u8 **matchpos, const u8 **startpos)
-{
- u16 *const chaintable = hc4->chaintable;
- HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
- const u8 * const base = hc4->base;
-#else
- const int base = 0;
-#endif
- const u8 *ref;
- int nbattempts = MAX_NB_ATTEMPTS;
- int delta = (int)(ip - startlimit);
-
- /* First Match */
- lz4hc_insert(hc4, ip);
- ref = hashtable[HASH_VALUE(ip)] + base;
-
- while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
- && (nbattempts)) {
- nbattempts--;
- if (*(startlimit + longest) == *(ref - delta + longest)) {
- if (A32(ref) == A32(ip)) {
- const u8 *reft = ref;
- const u8 *startt = ip;
- unsigned length =
- common_length(ip + MINMATCH,
- ref + MINMATCH,
- matchlimit);
-
- while ((startt > startlimit)
- && (reft > hc4->base)
- && (startt[-1] == reft[-1])) {
- startt--;
- reft--;
- length++;
- }
-
- if (length > longest) {
- longest = length;
- *matchpos = reft;
- *startpos = startt;
- }
- }
- }
- ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
- }
- return longest;
-}
-
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
- int ml, const u8 *ref)
-{
- unsigned length;
- u8 *token;
-
- /* Encode Literal length */
- length = *ip - *anchor;
- token = (*op)++;
- *token = encode_length(op, length) << ML_BITS;
-
- /* Copy Literals */
- MEMCPY_ADVANCE_CHUNKED(*op, *anchor, length);
-
- /* Encode Offset */
- PUT_LE16_ADVANCE(*op, (u16)(*ip - ref));
-
- *token += encode_length(op, ml - MINMATCH);
-
- /* Prepare next loop */
- *ip += ml;
- *anchor = *ip;
-
- return 0;
-}
-
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
- const char *source,
- char *dest,
- int isize)
-{
- const u8 *ip = (const u8 *)source;
- const u8 *anchor = ip;
- const u8 *const iend = ip + isize;
- const u8 *const mflimit = iend - MFLIMIT;
- const u8 *const matchlimit = (iend - LASTLITERALS);
-
- u8 *op = (u8 *)dest;
-
- int ml, ml2, ml3, ml0;
- const u8 *ref = NULL;
- const u8 *start2 = NULL;
- const u8 *ref2 = NULL;
- const u8 *start3 = NULL;
- const u8 *ref3 = NULL;
- const u8 *start0;
- const u8 *ref0;
- int lastrun;
-
- ip++;
-
- /* Main Loop */
- while (ip < mflimit) {
- ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
- if (!ml) {
- ip++;
- continue;
- }
-
- /* saved, in case we would skip too much */
- start0 = ip;
- ref0 = ref;
- ml0 = ml;
-_search2:
- if (ip+ml < mflimit)
- ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
- ip + 1, matchlimit, ml, &ref2, &start2);
- else
- ml2 = ml;
- /* No better match */
- if (ml2 == ml) {
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
- continue;
- }
-
- if (start0 < ip) {
- /* empirical */
- if (start2 < ip + ml0) {
- ip = start0;
- ref = ref0;
- ml = ml0;
- }
- }
- /*
- * Here, start0==ip
- * First Match too small : removed
- */
- if ((start2 - ip) < 3) {
- ml = ml2;
- ip = start2;
- ref = ref2;
- goto _search2;
- }
-
-_search3:
- /*
- * Currently we have :
- * ml2 > ml1, and
- * ip1+3 <= ip2 (usually < ip1+ml1)
- */
- if ((start2 - ip) < OPTIMAL_ML) {
- int correction;
- int new_ml = ml;
- if (new_ml > OPTIMAL_ML)
- new_ml = OPTIMAL_ML;
- if (ip + new_ml > start2 + ml2 - MINMATCH)
- new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
- correction = new_ml - (int)(start2 - ip);
- if (correction > 0) {
- start2 += correction;
- ref2 += correction;
- ml2 -= correction;
- }
- }
- /*
- * Now, we have start2 = ip+new_ml,
- * with new_ml=min(ml, OPTIMAL_ML=18)
- */
- if (start2 + ml2 < mflimit)
- ml3 = lz4hc_insertandgetwidermatch(ctx,
- start2 + ml2 - 3, start2, matchlimit,
- ml2, &ref3, &start3);
- else
- ml3 = ml2;
-
- /* No better match : 2 sequences to encode */
- if (ml3 == ml2) {
- /* ip & ref are known; Now for ml */
- if (start2 < ip+ml)
- ml = (int)(start2 - ip);
-
- /* Now, encode 2 sequences */
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
- ip = start2;
- lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
- continue;
- }
-
- /* Not enough space for match 2 : remove it */
- if (start3 < ip + ml + 3) {
- /*
- * can write Seq1 immediately ==> Seq2 is removed,
- * so Seq3 becomes Seq1
- */
- if (start3 >= (ip + ml)) {
- if (start2 < ip + ml) {
- int correction =
- (int)(ip + ml - start2);
- start2 += correction;
- ref2 += correction;
- ml2 -= correction;
- if (ml2 < MINMATCH) {
- start2 = start3;
- ref2 = ref3;
- ml2 = ml3;
- }
- }
-
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
- ip = start3;
- ref = ref3;
- ml = ml3;
-
- start0 = start2;
- ref0 = ref2;
- ml0 = ml2;
- goto _search2;
- }
-
- start2 = start3;
- ref2 = ref3;
- ml2 = ml3;
- goto _search3;
- }
-
- /*
- * OK, now we have 3 ascending matches; let's write at least
- * the first one ip & ref are known; Now for ml
- */
- if (start2 < ip + ml) {
- if ((start2 - ip) < (int)ML_MASK) {
- int correction;
- if (ml > OPTIMAL_ML)
- ml = OPTIMAL_ML;
- if (ip + ml > start2 + ml2 - MINMATCH)
- ml = (int)(start2 - ip) + ml2
- - MINMATCH;
- correction = ml - (int)(start2 - ip);
- if (correction > 0) {
- start2 += correction;
- ref2 += correction;
- ml2 -= correction;
- }
- } else
- ml = (int)(start2 - ip);
- }
- lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-
- ip = start2;
- ref = ref2;
- ml = ml2;
-
- start2 = start3;
- ref2 = ref3;
- ml2 = ml3;
-
- goto _search3;
- }
-
- /* Encode Last Literals */
- lastrun = (int)(iend - anchor);
- if (lastrun >= (int)RUN_MASK) {
- *op++ = (RUN_MASK << ML_BITS);
- lastrun -= RUN_MASK;
- for (; lastrun > 254 ; lastrun -= 255)
- *op++ = 255;
- *op++ = (u8) lastrun;
- } else
- *op++ = (lastrun << ML_BITS);
- memcpy(op, anchor, iend - anchor);
- op += iend - anchor;
- /* End */
- return (int) (((char *)op) - dest);
-}
-
-int lz4hc_compress(const unsigned char *src, size_t src_len,
- unsigned char *dst, size_t *dst_len, void *wrkmem)
-{
- int ret = -1;
- int out_len = 0;
-
- struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
- lz4hc_init(hc4, (const u8 *)src);
- out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
- (char *)dst, (int)src_len);
-
- if (out_len < 0)
- goto exit;
-
- *dst_len = out_len;
- return 0;
-
-exit:
- return ret;
-}
-EXPORT_SYMBOL(lz4hc_compress);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
diff --git a/linux/sha1.c b/linux/sha1.c
deleted file mode 100644
index 5a56dfd7..00000000
--- a/linux/sha1.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * SHA1 routine optimized to do word accesses rather than byte accesses,
- * and to avoid unnecessary copies into the context array.
- *
- * This was based on the git SHA1 implementation.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/bitops.h>
-#include <linux/cryptohash.h>
-#include <asm/unaligned.h>
-
-/*
- * If you have 32 registers or more, the compiler can (and should)
- * try to change the array[] accesses into registers. However, on
- * machines with less than ~25 registers, that won't really work,
- * and at least gcc will make an unholy mess of it.
- *
- * So to avoid that mess which just slows things down, we force
- * the stores to memory to actually happen (we might be better off
- * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
- * suggested by Artur Skawina - that will also make gcc unable to
- * try to do the silly "optimize away loads" part because it won't
- * see what the value will be).
- *
- * Ben Herrenschmidt reports that on PPC, the C version comes close
- * to the optimized asm with this (ie on PPC you don't want that
- * 'volatile', since there are lots of registers).
- *
- * On ARM we get the best code generation by forcing a full memory barrier
- * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
- * the stack frame size simply explode and performance goes down the drain.
- */
-
-#ifdef CONFIG_X86
- #define setW(x, val) (*(volatile __u32 *)&W(x) = (val))
-#elif defined(CONFIG_ARM)
- #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
-#else
- #define setW(x, val) (W(x) = (val))
-#endif
-
-/* This "rolls" over the 512-bit array */
-#define W(x) (array[(x)&15])
-
-/*
- * Where do we get the source from? The first 16 iterations get it from
- * the input data, the next mix it from the 512-bit array.
- */
-#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t)
-#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
-
-#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
- __u32 TEMP = input(t); setW(t, TEMP); \
- E += TEMP + rol32(A,5) + (fn) + (constant); \
- B = ror32(B, 2); } while (0)
-
-#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
-#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
-#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
-#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
-#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
-
-/**
- * sha_transform - single block SHA1 transform
- *
- * @digest: 160 bit digest to update
- * @data: 512 bits of data to hash
- * @array: 16 words of workspace (see note)
- *
- * This function generates a SHA1 digest for a single 512-bit block.
- * Be warned, it does not handle padding and message digest, do not
- * confuse it with the full FIPS 180-1 digest algorithm for variable
- * length messages.
- *
- * Note: If the hash is security sensitive, the caller should be sure
- * to clear the workspace. This is left to the caller to avoid
- * unnecessary clears between chained hashing operations.
- */
-void sha_transform(__u32 *digest, const char *data, __u32 *array)
-{
- __u32 A, B, C, D, E;
-
- A = digest[0];
- B = digest[1];
- C = digest[2];
- D = digest[3];
- E = digest[4];
-
- /* Round 1 - iterations 0-16 take their input from 'data' */
- T_0_15( 0, A, B, C, D, E);
- T_0_15( 1, E, A, B, C, D);
- T_0_15( 2, D, E, A, B, C);
- T_0_15( 3, C, D, E, A, B);
- T_0_15( 4, B, C, D, E, A);
- T_0_15( 5, A, B, C, D, E);
- T_0_15( 6, E, A, B, C, D);
- T_0_15( 7, D, E, A, B, C);
- T_0_15( 8, C, D, E, A, B);
- T_0_15( 9, B, C, D, E, A);
- T_0_15(10, A, B, C, D, E);
- T_0_15(11, E, A, B, C, D);
- T_0_15(12, D, E, A, B, C);
- T_0_15(13, C, D, E, A, B);
- T_0_15(14, B, C, D, E, A);
- T_0_15(15, A, B, C, D, E);
-
- /* Round 1 - tail. Input from 512-bit mixing array */
- T_16_19(16, E, A, B, C, D);
- T_16_19(17, D, E, A, B, C);
- T_16_19(18, C, D, E, A, B);
- T_16_19(19, B, C, D, E, A);
-
- /* Round 2 */
- T_20_39(20, A, B, C, D, E);
- T_20_39(21, E, A, B, C, D);
- T_20_39(22, D, E, A, B, C);
- T_20_39(23, C, D, E, A, B);
- T_20_39(24, B, C, D, E, A);
- T_20_39(25, A, B, C, D, E);
- T_20_39(26, E, A, B, C, D);
- T_20_39(27, D, E, A, B, C);
- T_20_39(28, C, D, E, A, B);
- T_20_39(29, B, C, D, E, A);
- T_20_39(30, A, B, C, D, E);
- T_20_39(31, E, A, B, C, D);
- T_20_39(32, D, E, A, B, C);
- T_20_39(33, C, D, E, A, B);
- T_20_39(34, B, C, D, E, A);
- T_20_39(35, A, B, C, D, E);
- T_20_39(36, E, A, B, C, D);
- T_20_39(37, D, E, A, B, C);
- T_20_39(38, C, D, E, A, B);
- T_20_39(39, B, C, D, E, A);
-
- /* Round 3 */
- T_40_59(40, A, B, C, D, E);
- T_40_59(41, E, A, B, C, D);
- T_40_59(42, D, E, A, B, C);
- T_40_59(43, C, D, E, A, B);
- T_40_59(44, B, C, D, E, A);
- T_40_59(45, A, B, C, D, E);
- T_40_59(46, E, A, B, C, D);
- T_40_59(47, D, E, A, B, C);
- T_40_59(48, C, D, E, A, B);
- T_40_59(49, B, C, D, E, A);
- T_40_59(50, A, B, C, D, E);
- T_40_59(51, E, A, B, C, D);
- T_40_59(52, D, E, A, B, C);
- T_40_59(53, C, D, E, A, B);
- T_40_59(54, B, C, D, E, A);
- T_40_59(55, A, B, C, D, E);
- T_40_59(56, E, A, B, C, D);
- T_40_59(57, D, E, A, B, C);
- T_40_59(58, C, D, E, A, B);
- T_40_59(59, B, C, D, E, A);
-
- /* Round 4 */
- T_60_79(60, A, B, C, D, E);
- T_60_79(61, E, A, B, C, D);
- T_60_79(62, D, E, A, B, C);
- T_60_79(63, C, D, E, A, B);
- T_60_79(64, B, C, D, E, A);
- T_60_79(65, A, B, C, D, E);
- T_60_79(66, E, A, B, C, D);
- T_60_79(67, D, E, A, B, C);
- T_60_79(68, C, D, E, A, B);
- T_60_79(69, B, C, D, E, A);
- T_60_79(70, A, B, C, D, E);
- T_60_79(71, E, A, B, C, D);
- T_60_79(72, D, E, A, B, C);
- T_60_79(73, C, D, E, A, B);
- T_60_79(74, B, C, D, E, A);
- T_60_79(75, A, B, C, D, E);
- T_60_79(76, E, A, B, C, D);
- T_60_79(77, D, E, A, B, C);
- T_60_79(78, C, D, E, A, B);
- T_60_79(79, B, C, D, E, A);
-
- digest[0] += A;
- digest[1] += B;
- digest[2] += C;
- digest[3] += D;
- digest[4] += E;
-}
-EXPORT_SYMBOL(sha_transform);
-
-/**
- * sha_init - initialize the vectors for a SHA1 digest
- * @buf: vector to initialize
- */
-void sha_init(__u32 *buf)
-{
- buf[0] = 0x67452301;
- buf[1] = 0xefcdab89;
- buf[2] = 0x98badcfe;
- buf[3] = 0x10325476;
- buf[4] = 0xc3d2e1f0;
-}
-EXPORT_SYMBOL(sha_init);