summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2015-02-11 17:53:45 -0800
committerKent Overstreet <kmo@daterainc.com>2015-02-12 23:44:04 -0800
commitb4ba673ff0908f348c7f9586ddd9922967702bf6 (patch)
tree83a1d23e3efe726bad457b8da8d81b1defa61465
parent419f208394c58fe693a5dbaae162f41154096e43 (diff)
bcache: Packed bkeys
Change-Id: I23b3d03524fa57d70e016030c55f9acd64affd89
-rw-r--r--drivers/md/bcache/alloc.c50
-rw-r--r--drivers/md/bcache/alloc.h9
-rw-r--r--drivers/md/bcache/bcache.h1
-rw-r--r--drivers/md/bcache/bkey.c348
-rw-r--r--drivers/md/bcache/bkey.h415
-rw-r--r--drivers/md/bcache/bkey_methods.c30
-rw-r--r--drivers/md/bcache/bkey_methods.h15
-rw-r--r--drivers/md/bcache/blockdev.c31
-rw-r--r--drivers/md/bcache/bset.c402
-rw-r--r--drivers/md/bcache/bset.h101
-rw-r--r--drivers/md/bcache/btree.c246
-rw-r--r--drivers/md/bcache/btree.h37
-rw-r--r--drivers/md/bcache/buckets.h4
-rw-r--r--drivers/md/bcache/debug.c7
-rw-r--r--drivers/md/bcache/dirent.c105
-rw-r--r--drivers/md/bcache/extents.c747
-rw-r--r--drivers/md/bcache/extents.h99
-rw-r--r--drivers/md/bcache/fs.c38
-rw-r--r--drivers/md/bcache/gc.c87
-rw-r--r--drivers/md/bcache/gc.h9
-rw-r--r--drivers/md/bcache/inode.c63
-rw-r--r--drivers/md/bcache/inode.h4
-rw-r--r--drivers/md/bcache/io.c166
-rw-r--r--drivers/md/bcache/io.h10
-rw-r--r--drivers/md/bcache/ioctl.c74
-rw-r--r--drivers/md/bcache/journal.c61
-rw-r--r--drivers/md/bcache/journal.h6
-rw-r--r--drivers/md/bcache/keybuf.c27
-rw-r--r--drivers/md/bcache/keybuf.h2
-rw-r--r--drivers/md/bcache/keylist.c45
-rw-r--r--drivers/md/bcache/keylist.h14
-rw-r--r--drivers/md/bcache/keylist_types.h2
-rw-r--r--drivers/md/bcache/move.c90
-rw-r--r--drivers/md/bcache/move.h2
-rw-r--r--drivers/md/bcache/movinggc.c29
-rw-r--r--drivers/md/bcache/request.c44
-rw-r--r--drivers/md/bcache/super.c10
-rw-r--r--drivers/md/bcache/super.h8
-rw-r--r--drivers/md/bcache/sysfs.c8
-rw-r--r--drivers/md/bcache/tier.c34
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--drivers/md/bcache/writeback.c55
-rw-r--r--drivers/md/bcache/xattr.c42
-rw-r--r--include/linux/bcache-kernel.h53
-rw-r--r--include/trace/events/bcache.h50
-rw-r--r--include/uapi/linux/bcache.h102
46 files changed, 2515 insertions, 1273 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ece8f5a1aadd..75186c8b58c7 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -857,21 +857,21 @@ void __bch_bucket_free(struct cache *ca, struct bucket *g)
g->write_prio = ca->set->prio_clock[WRITE].hand;
}
-void bch_bucket_free(struct cache_set *c, struct bkey *k)
+void bch_bucket_free(struct cache_set *c, struct bkey_i *k)
{
- struct bkey_i_extent *e = bkey_i_to_extent(k);
+ struct bkey_s_extent e = bkey_i_to_s_extent(k);
struct bch_extent_ptr *ptr;
struct cache *ca;
rcu_read_lock();
extent_for_each_online_device(c, e, ptr, ca)
- __bch_bucket_free(ca, PTR_BUCKET(ca, ptr));
+ __bch_bucket_free(ca, PTR_BUCKET(ca, ptr));
rcu_read_unlock();
}
-static void bch_bucket_free_never_used(struct cache_set *c, struct bkey *k)
+static void bch_bucket_free_never_used(struct cache_set *c, struct bkey_i *k)
{
- struct bkey_i_extent *e = bkey_i_to_extent(k);
+ struct bkey_s_extent e = bkey_i_to_s_extent(k);
struct bch_extent_ptr *ptr;
struct cache *ca;
struct bucket *g;
@@ -960,7 +960,7 @@ static struct cache *bch_next_cache(struct cache_set *c,
static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
enum alloc_reserve reserve,
- struct bkey *k, int n,
+ struct bkey_i *k, int n,
struct cache_group *devs)
{
struct bkey_i_extent *e;
@@ -1024,7 +1024,7 @@ static enum bucket_alloc_ret __bch_bucket_alloc_set(struct cache_set *c,
e->v.ptr[i] = PTR(ca->bucket_gens[r],
bucket_to_sector(ca, r),
ca->sb.nr_this_dev);
- bch_set_extent_ptrs(e, i + 1);
+ bch_set_extent_ptrs(extent_i_to_s(e), i + 1);
}
rcu_read_unlock();
@@ -1036,7 +1036,7 @@ err:
}
int bch_bucket_alloc_set(struct cache_set *c, enum alloc_reserve reserve,
- struct bkey *k, int n, struct cache_group *devs,
+ struct bkey_i *k, int n, struct cache_group *devs,
struct closure *cl)
{
struct closure_waitlist *waitlist = NULL;
@@ -1083,7 +1083,7 @@ int bch_bucket_alloc_set(struct cache_set *c, enum alloc_reserve reserve,
static void __bch_open_bucket_put(struct cache_set *c, struct open_bucket *b)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(&b->key);
+ struct bkey_s_extent e = bkey_i_to_s_extent(&b->key);
const struct bch_extent_ptr *ptr;
struct cache *ca;
@@ -1145,7 +1145,7 @@ static struct open_bucket *bch_open_bucket_alloc(struct cache_set *c,
{
int ret;
struct open_bucket *b;
- struct bkey_i_extent *e;
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct cache *ca;
unsigned n_replicas;
@@ -1174,7 +1174,7 @@ static struct open_bucket *bch_open_bucket_alloc(struct cache_set *c,
b->sectors_free = UINT_MAX;
rcu_read_lock();
- e = bkey_i_to_extent(&b->key);
+ e = bkey_i_to_s_extent(&b->key);
/* This is still wrong - we waste space with different sized buckets */
extent_for_each_online_device(c, e, ptr, ca)
@@ -1237,10 +1237,10 @@ static struct open_bucket *lock_and_refill_writepoint(struct cache_set *c,
}
}
-static void verify_not_stale(struct cache_set *c, const struct bkey *k)
+static void verify_not_stale(struct cache_set *c, const struct bkey_i *k)
{
#ifdef CONFIG_BCACHE_DEBUG
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
struct cache *ca;
@@ -1270,12 +1270,12 @@ static void verify_not_stale(struct cache_set *c, const struct bkey *k)
*/
struct open_bucket *bch_alloc_sectors(struct cache_set *c,
struct write_point *wp,
- struct bkey *k,
+ struct bkey_i *k,
struct closure *cl,
bool contiguous)
{
bool first_time = true;
- struct bkey_i_extent *src, *dst;
+ struct bkey_s_extent src, dst;
struct bch_extent_ptr *ptr;
struct open_bucket *b;
struct cache *ca;
@@ -1288,7 +1288,7 @@ retry:
BUG_ON(!b->sectors_free);
- if (contiguous && b->sectors_free < k->size) {
+ if (contiguous && b->sectors_free < k->k.size) {
if (!first_time)
return NULL;
@@ -1301,22 +1301,22 @@ retry:
verify_not_stale(c, &b->key);
- src = bkey_i_to_extent(&b->key);
- dst = bkey_i_to_extent(k);
+ src = bkey_i_to_s_extent(&b->key);
+ dst = bkey_i_to_s_extent(k);
nptrs = (bch_extent_ptrs(dst) + bch_extent_ptrs(src));
BUG_ON(nptrs > BKEY_EXTENT_PTRS_MAX);
/* Set up the pointer to the space we're allocating: */
- memcpy(&dst->v.ptr[bch_extent_ptrs(dst)],
- src->v.ptr,
+ memcpy(&dst.v->ptr[bch_extent_ptrs(dst)],
+ src.v->ptr,
bch_extent_ptrs(src) * sizeof(u64));
bch_set_extent_ptrs(dst, nptrs);
- sectors = min_t(unsigned, dst->k.size, b->sectors_free);
+ sectors = min_t(unsigned, dst.k->size, b->sectors_free);
- bch_key_resize(&dst->k, sectors);
+ bch_key_resize(dst.k, sectors);
/* update open bucket for next time: */
@@ -1479,7 +1479,7 @@ static bool bch_stop_write_point(struct cache *ca,
return false;
}
- if (!bch_extent_has_device(bkey_i_to_extent(&b->key),
+ if (!bch_extent_has_device(bkey_i_to_s_c_extent(&b->key),
ca->sb.nr_this_dev)) {
spin_unlock(&b->lock);
return false;
@@ -1545,7 +1545,7 @@ void bch_stop_new_data_writes(struct cache *ca)
static bool bucket_still_writeable(struct open_bucket *b, struct cache_set *c)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(&b->key);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
const struct bch_extent_ptr *ptr;
struct cache *ca;
@@ -1578,7 +1578,7 @@ retry:
list_for_each_entry(b, &c->open_buckets_open, list) {
spin_lock(&b->lock);
- if (bch_extent_has_device(bkey_i_to_extent(&b->key),
+ if (bch_extent_has_device(bkey_i_to_s_c_extent(&b->key),
ca->sb.nr_this_dev))
found = true;
spin_unlock(&b->lock);
diff --git a/drivers/md/bcache/alloc.h b/drivers/md/bcache/alloc.h
index 1d18878f6f2b..56495d646deb 100644
--- a/drivers/md/bcache/alloc.h
+++ b/drivers/md/bcache/alloc.h
@@ -38,15 +38,16 @@ static inline void bch_increment_clock(struct cache_set *c,
}
void __bch_bucket_free(struct cache *, struct bucket *);
-void bch_bucket_free(struct cache_set *, struct bkey *);
+void bch_bucket_free(struct cache_set *, struct bkey_i *);
-int bch_bucket_alloc_set(struct cache_set *, enum alloc_reserve, struct bkey *,
- int, struct cache_group *, struct closure *);
+int bch_bucket_alloc_set(struct cache_set *, enum alloc_reserve,
+ struct bkey_i *, int,
+ struct cache_group *, struct closure *);
void bch_open_bucket_put(struct cache_set *, struct open_bucket *);
struct open_bucket *bch_alloc_sectors(struct cache_set *, struct write_point *,
- struct bkey *, struct closure *, bool);
+ struct bkey_i *, struct closure *, bool);
static inline void bch_wake_allocator(struct cache *ca)
{
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index e67523dd5073..e0439f45f65a 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -177,6 +177,7 @@
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#include <linux/bug.h>
#include <linux/bcache.h>
#include <linux/bcache-kernel.h>
#include <linux/bio.h>
diff --git a/drivers/md/bcache/bkey.c b/drivers/md/bcache/bkey.c
index 7a2b36c6337e..ead977cd458f 100644
--- a/drivers/md/bcache/bkey.c
+++ b/drivers/md/bcache/bkey.c
@@ -1,7 +1,13 @@
+#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+
#include <linux/kernel.h>
#include "bkey.h"
+#include "bset.h"
+#include "util.h"
+
+const struct bkey_format bch_bkey_format_current = BKEY_FORMAT_CURRENT;
int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
{
@@ -31,3 +37,345 @@ int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
return out - buf;
}
+
+struct pack_state {
+ const struct bkey_format *format;
+ unsigned field;
+ unsigned shift;
+ u64 *p;
+};
+
+__always_inline
+static struct pack_state pack_state_init(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return (struct pack_state) {
+ .format = format,
+ .field = 0,
+ .shift = 64 - high_bit_offset,
+ .p = (u64 *) high_word(format, k),
+ };
+}
+
+__always_inline
+static u64 get_inc_field(struct pack_state *state)
+{
+ unsigned bits = state->format->bits_per_field[state->field++];
+
+ /* bits might be 0 - and if bits is 0, v will be 0 when we use mask */
+ u64 v = 0, mask = ~((~0ULL << 1) << (bits - 1));
+
+ if (bits >= state->shift) {
+ bits -= state->shift;
+ v = *state->p << bits;
+
+ state->p = next_word(state->p);
+ state->shift = 64;
+ }
+
+ if (bits) {
+ state->shift -= bits;
+ v |= *state->p >> state->shift;
+ }
+
+ return v & mask;
+}
+
+__always_inline
+static bool set_inc_field(struct pack_state *state, u64 v)
+{
+ unsigned bits = state->format->bits_per_field[state->field++];
+
+ if (fls64(v) > bits)
+ return false;
+
+ if (bits >= state->shift) {
+ bits -= state->shift;
+ *state->p |= v >> bits;
+
+ state->p = next_word(state->p);
+ state->shift = 64;
+ }
+
+ if (bits) {
+ state->shift -= bits;
+ *state->p |= v << state->shift;
+ }
+
+ return true;
+}
+
+bool bch_bkey_format_transform(const struct bkey_format *out_f,
+ struct bkey_packed *out,
+ const struct bkey_format *in_f,
+ const struct bkey_packed *in)
+{
+ struct pack_state out_s = pack_state_init(out_f, out);
+ struct pack_state in_s = pack_state_init(in_f, in);
+ unsigned i;
+
+ out->u64s = out_f->key_u64s + in->u64s - in_f->key_u64s;
+ out->type = in->type;
+ memset(&out->key_start, 0,
+ out_f->key_u64s * sizeof(u64) -
+ offsetof(struct bkey_packed, key_start));
+
+ for (i = 0; i < out_s.format->nr_fields; i++)
+ if (!set_inc_field(&out_s, get_inc_field(&in_s)))
+ return false;
+
+ return true;
+}
+
+void bkey_unpack(struct bkey_i *dst,
+ const struct bkey_format *format,
+ const struct bkey_packed *src)
+{
+ dst->k = bkey_unpack_key(format, src);
+
+ memcpy(&dst->v,
+ bkeyp_val(format, src),
+ bkeyp_val_bytes(format, src));
+}
+
+bool bkey_pack(struct bkey_packed *out, const struct bkey_i *in,
+ const struct bkey_format *format)
+{
+ struct bkey_packed tmp;
+
+ if (!bkey_pack_key(&tmp, &in->k, format))
+ return false;
+
+ memmove((u64 *) out + format->key_u64s,
+ &in->v,
+ bkey_val_bytes(&in->k));
+ memcpy(out, &tmp,
+ format->key_u64s * sizeof(u64));
+
+ return true;
+}
+
+bool bkey_pack_pos(struct bkey_packed *out, struct bpos in,
+ const struct bkey_format *format)
+{
+ struct pack_state state = pack_state_init(format, out);
+
+ memset(out, 0, format->key_u64s * sizeof(u64));
+ out->u64s = format->key_u64s;
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+ out->type = KEY_TYPE_DELETED;
+
+ return (set_inc_field(&state, in.inode) &&
+ set_inc_field(&state, in.offset) &&
+ set_inc_field(&state, in.snapshot));
+}
+
+__always_inline
+static void set_inc_field_lossy(struct pack_state *state, u64 v)
+{
+ unsigned bits = state->format->bits_per_field[state->field++];
+
+ v <<= 64 - bits;
+ v >>= 64 - bits;
+
+ if (bits >= state->shift) {
+ bits -= state->shift;
+ *state->p |= v >> bits;
+
+ state->p = next_word(state->p);
+ state->shift = 64;
+ }
+
+ if (bits) {
+ state->shift -= bits;
+ *state->p |= v << state->shift;
+ }
+}
+
+/*
+ * This is used in bset_search_tree(), where we need a packed pos in order to be
+ * able to compare against the keys in the auxiliary search tree - and it's
+ * legal to use a packed pos that isn't equivalent to the original pos,
+ * _provided_ it compares <= to the original pos.
+ */
+void bkey_pack_pos_lossy(struct bkey_packed *out, struct bpos in,
+ const struct bkey_format *format)
+{
+ struct pack_state state = pack_state_init(format, out);
+
+ memset(out, 0, format->key_u64s * sizeof(u64));
+ out->u64s = format->key_u64s;
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+ out->type = KEY_TYPE_DELETED;
+
+ set_inc_field_lossy(&state, in.inode);
+ set_inc_field_lossy(&state, in.offset);
+ set_inc_field_lossy(&state, in.snapshot);
+}
+
+void bch_bkey_format_init(struct bkey_format *format)
+{
+ *format = (struct bkey_format) {
+ .nr_fields = BKEY_NR_FIELDS,
+ };
+}
+
+static void __bkey_format_add(struct bkey_format *format,
+ unsigned *field, u64 v)
+{
+ u8 *bits = &format->bits_per_field[(*field)++];
+
+ *bits = max_t(unsigned, *bits, fls64(v));
+}
+
+/*
+ * Changes @format so that @k can be successfully packed with @format
+ */
+void bch_bkey_format_add(struct bkey_format *format, struct bkey *k)
+{
+ unsigned field = 0;
+
+ __bkey_format_add(format, &field, k->p.inode);
+ __bkey_format_add(format, &field, k->p.offset);
+ __bkey_format_add(format, &field, k->p.snapshot);
+ __bkey_format_add(format, &field, k->size);
+ __bkey_format_add(format, &field, k->version);
+ EBUG_ON(field != BKEY_NR_FIELDS);
+}
+
+void bch_bkey_format_done(struct bkey_format *format)
+{
+ unsigned i, bits = KEY_PACKED_BITS_START;
+
+ for (i = 0; i < ARRAY_SIZE(format->bits_per_field); i++)
+ bits += format->bits_per_field[i];
+
+ format->key_u64s = DIV_ROUND_UP(bits, 64);
+}
+
+struct bkey_format btree_keys_calc_format(struct btree_keys *b)
+{
+ struct bkey_format ret;
+ struct btree_node_iter iter;
+ struct bkey_tup tup;
+
+ bch_bkey_format_init(&ret);
+
+ for_each_btree_node_key_unpack(b, &tup, &iter)
+ bch_bkey_format_add(&ret, &tup.k);
+
+ bch_bkey_format_done(&ret);
+
+ return ret;
+}
+
+/* Most significant differing bit */
+unsigned bkey_greatest_differing_bit(const struct bkey_format *format,
+ const struct bkey_packed *l_k,
+ const struct bkey_packed *r_k)
+{
+ const u64 *l = high_word(format, l_k);
+ const u64 *r = high_word(format, r_k);
+ unsigned nr_key_bits = bkey_format_key_bits(format);
+ u64 l_v, r_v;
+
+ /* for big endian, skip past header */
+ nr_key_bits += high_bit_offset;
+ l_v = *l & (~0ULL >> high_bit_offset);
+ r_v = *r & (~0ULL >> high_bit_offset);
+
+ while (1) {
+ if (nr_key_bits < 64) {
+ l_v >>= 64 - nr_key_bits;
+ r_v >>= 64 - nr_key_bits;
+ nr_key_bits = 0;
+ } else {
+ nr_key_bits -= 64;
+ }
+
+ if (l_v != r_v)
+ return fls64(l_v ^ r_v) + nr_key_bits;
+
+ if (!nr_key_bits)
+ return 0;
+
+ l = next_word(l);
+ r = next_word(r);
+
+ l_v = *l;
+ r_v = *r;
+ }
+}
+
+static int __bkey_cmp_bits(unsigned nr_key_bits, const u64 *l, const u64 *r)
+{
+ u64 l_v, r_v;
+
+ if (!nr_key_bits)
+ return 0;
+
+ /* for big endian, skip past header */
+ nr_key_bits += high_bit_offset;
+ l_v = *l & (~0ULL >> high_bit_offset);
+ r_v = *r & (~0ULL >> high_bit_offset);
+
+ while (1) {
+ if (nr_key_bits < 64) {
+ l_v >>= 64 - nr_key_bits;
+ r_v >>= 64 - nr_key_bits;
+ nr_key_bits = 0;
+ } else {
+ nr_key_bits -= 64;
+ }
+
+ if (l_v != r_v)
+ return l_v < r_v ? -1 : 1;
+
+ if (!nr_key_bits)
+ return 0;
+
+ l = next_word(l);
+ r = next_word(r);
+
+ l_v = *l;
+ r_v = *r;
+ }
+}
+
+/*
+ * Would like to use this if we can make __bkey_cmp_bits() fast enough, it'll be
+ * a decent reduction in code size
+ */
+#if 0
+int bkey_cmp(struct bpos l, struct bpos r)
+{
+ return __bkey_cmp_bits((sizeof(l->p.inode) +
+ sizeof(l->p.offset) +
+ sizeof(l->p.snapshot)) * 8,
+ __high_word(BKEY_U64s, l),
+ __high_word(BKEY_U64s, r));
+}
+#endif
+
+int __bkey_cmp_packed(const struct bkey_format *f,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
+{
+ int ret;
+
+ EBUG_ON(!bkey_packed(l) || !bkey_packed(r));
+
+ ret = __bkey_cmp_bits(bkey_format_key_bits(f),
+ high_word(f, l),
+ high_word(f, r));
+
+ EBUG_ON(ret != bkey_cmp(bkey_unpack_key(f, l).p,
+ bkey_unpack_key(f, r).p));
+ return ret;
+}
+
+int __bkey_cmp_left_packed(const struct bkey_format *format,
+ const struct bkey_packed *l, struct bpos r)
+{
+ return bkey_cmp(__bkey_unpack_key(format, l).p, r);
+}
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h
index 1262a29e7e41..1ce802feeaa0 100644
--- a/drivers/md/bcache/bkey.h
+++ b/drivers/md/bcache/bkey.h
@@ -3,6 +3,9 @@
#include <asm/bug.h>
#include <linux/bcache.h>
+#include <linux/bcache-kernel.h>
+
+#include "util.h"
int bch_bkey_to_text(char *, size_t, const struct bkey *);
@@ -43,7 +46,36 @@ static inline void set_bkey_deleted(struct bkey *k)
#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED)
-static __always_inline int64_t bkey_cmp(struct bpos l, struct bpos r)
+struct btree_keys;
+
+void bch_bkey_format_init(struct bkey_format *);
+void bch_bkey_format_add(struct bkey_format *, struct bkey *);
+void bch_bkey_format_done(struct bkey_format *);
+struct bkey_format btree_keys_calc_format(struct btree_keys *);
+
+unsigned bkey_greatest_differing_bit(const struct bkey_format *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+
+int __bkey_cmp_left_packed(const struct bkey_format *,
+ const struct bkey_packed *,
+ struct bpos);
+
+#define bkey_cmp_left_packed(_format, _l, _r) \
+({ \
+ const struct bkey *_l_unpacked; \
+ \
+ unlikely(_l_unpacked = packed_to_bkey_c(_l)) \
+ ? bkey_cmp(_l_unpacked->p, _r) \
+ : __bkey_cmp_left_packed(_format, _l, _r); \
+})
+
+int __bkey_cmp_packed(const struct bkey_format *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+
+#if 1
+static __always_inline int bkey_cmp(struct bpos l, struct bpos r)
{
if (l.inode != r.inode)
return l.inode < r.inode ? -1 : 1;
@@ -53,6 +85,89 @@ static __always_inline int64_t bkey_cmp(struct bpos l, struct bpos r)
return l.snapshot < r.snapshot ? -1 : 1;
return 0;
}
+#else
+int bkey_cmp(struct bpos l, struct bpos r);
+#endif
+
+static inline bool bkey_packed(const struct bkey_packed *k)
+{
+ EBUG_ON(k->format > KEY_FORMAT_CURRENT);
+ return k->format != KEY_FORMAT_CURRENT;
+}
+
+/*
+ * It's safe to treat an unpacked bkey as a packed one, but not the reverse
+ */
+static inline struct bkey_packed *bkey_to_packed(struct bkey_i *k)
+{
+ return (struct bkey_packed *) k;
+}
+
+static inline const struct bkey_packed *bkey_to_packed_c(const struct bkey_i *k)
+{
+ return (const struct bkey_packed *) k;
+}
+
+static inline struct bkey_i *packed_to_bkey(struct bkey_packed *k)
+{
+ return bkey_packed(k) ? NULL : (struct bkey_i *) k;
+}
+
+static inline const struct bkey *packed_to_bkey_c(const struct bkey_packed *k)
+{
+ return bkey_packed(k) ? NULL : (const struct bkey *) k;
+}
+
+static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
+{
+ return format->bits_per_field[BKEY_FIELD_INODE] +
+ format->bits_per_field[BKEY_FIELD_OFFSET] +
+ format->bits_per_field[BKEY_FIELD_SNAPSHOT];
+}
+
+#define bkey_packed_typecheck(_k) \
+({ \
+ BUILD_BUG_ON(!type_is(_k, struct bkey *) && \
+ !type_is(_k, struct bkey_packed *)); \
+ type_is(_k, struct bkey_packed *) && bkey_packed((void *) (_k));\
+})
+
+/*
+ * If @_l and @_r are in the same format, does the comparison without unpacking.
+ * Otherwise, unpacks whichever one is packed.
+ */
+#define bkey_cmp_packed(_f, _l, _r) \
+ ((bkey_packed_typecheck(_l) && bkey_packed_typecheck(_r)) \
+ ? __bkey_cmp_packed(_f, (void *) _l, (void *) _r) \
+ : bkey_packed_typecheck(_l) \
+ ? __bkey_cmp_left_packed(_f, \
+ (struct bkey_packed *) _l, \
+ ((struct bkey *) _r)->p) \
+ : bkey_packed_typecheck(_r) \
+ ? -__bkey_cmp_left_packed(_f, \
+ (struct bkey_packed *) _r, \
+ ((struct bkey *) _l)->p) \
+ : bkey_cmp(((struct bkey *) _l)->p, \
+ ((struct bkey *) _r)->p))
+
+/* packed or unpacked */
+static inline int bkey_cmp_p_or_unp(const struct bkey_format *format,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r_packed,
+ struct bpos r)
+{
+ const struct bkey *l_unpacked;
+
+ EBUG_ON(r_packed && !bkey_packed(r_packed));
+
+ if (unlikely(l_unpacked = packed_to_bkey_c(l)))
+ return bkey_cmp(l_unpacked->p, r);
+
+ if (likely(r_packed))
+ return __bkey_cmp_packed(format, l, r_packed);
+
+ return __bkey_cmp_left_packed(format, l, r);
+}
static inline struct bpos bkey_successor(struct bpos p)
{
@@ -78,22 +193,271 @@ static inline struct bpos bkey_start_pos(const struct bkey *k)
};
}
+/* Packed helpers */
+
+static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
+}
+
+static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return bkeyp_key_u64s(format, k) * sizeof(u64);
+}
+
+static inline unsigned bkeyp_val_u64s(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return k->u64s - bkeyp_key_u64s(format, k);
+}
+
+static inline size_t bkeyp_val_bytes(const struct bkey_format *format,
+ const struct bkey_packed *k)
+{
+ return bkeyp_val_u64s(format, k) * sizeof(u64);
+}
+
+#define bkeyp_val(_format, _k) \
+ ((struct bch_val *) ((_k)->_data + bkeyp_key_u64s(_format, _k)))
+
+extern const struct bkey_format bch_bkey_format_current;
+
+bool bch_bkey_format_transform(const struct bkey_format *,
+ struct bkey_packed *,
+ const struct bkey_format *,
+ const struct bkey_packed *);
+
+static inline struct bkey __bkey_unpack_key(const struct bkey_format *format,
+ const struct bkey_packed *in)
+{
+ struct bkey out;
+ bool s;
+
+ EBUG_ON(in->format != KEY_FORMAT_LOCAL_BTREE);
+
+ out.format = KEY_FORMAT_CURRENT;
+
+ s = bch_bkey_format_transform(&bch_bkey_format_current, (void *) &out,
+ format, in);
+ EBUG_ON(!s);
+
+ return out;
+}
+
+static inline struct bkey bkey_unpack_key(const struct bkey_format *format,
+ const struct bkey_packed *src)
+{
+ const struct bkey *src_unpacked;
+
+ return (src_unpacked = packed_to_bkey_c(src))
+ ? *src_unpacked
+ : __bkey_unpack_key(format, src);
+}
+
+static inline bool bkey_pack_key(struct bkey_packed *out, const struct bkey *in,
+ const struct bkey_format *format)
+{
+ EBUG_ON(in->format != KEY_FORMAT_CURRENT);
+
+ out->format = KEY_FORMAT_LOCAL_BTREE;
+
+ return bch_bkey_format_transform(format, out,
+ &bch_bkey_format_current, (void *) in);
+}
+
+bool bkey_pack_pos(struct bkey_packed *, struct bpos,
+ const struct bkey_format *);
+void bkey_pack_pos_lossy(struct bkey_packed *, struct bpos,
+ const struct bkey_format *);
+void bkey_unpack(struct bkey_i *, const struct bkey_format *,
+ const struct bkey_packed *);
+bool bkey_pack(struct bkey_packed *, const struct bkey_i *,
+ const struct bkey_format *);
+
+/* Disassembled bkeys */
+
+struct bkey_tup {
+ struct bkey k;
+ struct bch_val *v;
+};
+
+static inline void bkey_disassemble(struct bkey_tup *tup,
+ const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ tup->k = bkey_unpack_key(f, k);
+ tup->v = bkeyp_val(f, k);
+}
+
+static inline void bkey_reassemble(struct bkey_i *dst,
+ struct bkey_s_c src)
+{
+ dst->k = *src.k;
+ memcpy(&dst->v, src.v, bkey_val_bytes(src.k));
+}
+
+#define bkey_s_null ((struct bkey_s) { .k = NULL, .v = NULL })
+#define bkey_s_c_null ((struct bkey_s_c) { .k = NULL, .v = NULL })
+
+static inline struct bkey_s bkey_to_s(struct bkey *k)
+{
+ return (struct bkey_s) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s_c bkey_to_s_c(const struct bkey *k)
+{
+ return (struct bkey_s_c) { .k = k, .v = NULL };
+}
+
+static inline struct bkey_s bkey_tup_to_s(struct bkey_tup *tup)
+{
+ return (struct bkey_s) { .k = &tup->k, .v = tup->v };
+}
+
+static inline struct bkey_s_c bkey_tup_to_s_c(const struct bkey_tup *tup)
+{
+ return (struct bkey_s_c) { .k = &tup->k, .v = tup->v };
+}
+
+static inline struct bkey_s bkey_i_to_s(struct bkey_i *k)
+{
+ return (struct bkey_s) { .k = &k->k, .v = &k->v };
+}
+
+static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k)
+{
+ return (struct bkey_s_c) { .k = &k->k, .v = &k->v };
+}
+
+static inline struct bkey_s_c bkey_s_to_s_c(const struct bkey_s k)
+{
+ return (struct bkey_s_c) { .k = k.k, .v = k.v };
+}
+
+#define to_bkey_s_c(_k) \
+ ((struct bkey_s_c) { \
+ .k = (_k).k, \
+ .v = &(_k).v->v, \
+ })
+
+/*
+ * For a given type of value (e.g. struct bch_extent), generates the types for
+ * bkey + bch_extent - inline, split, split const - and also all the conversion
+ * functions, which also check that the value is of the correct type.
+ *
+ * We use anonymous unions for upcasting - e.g. converting from e.g. a
+ * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion
+ * functions.
+ */
#define BKEY_VAL_ACCESSORS(name, nr) \
-static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey *k) \
+struct bkey_s_##name { \
+ union { \
+ struct { \
+ struct bkey *k; \
+ struct bch_##name *v; \
+ }; \
+ struct bkey_s s; \
+ }; \
+}; \
+ \
+struct bkey_s_c_##name { \
+ union { \
+ struct { \
+ const struct bkey *k; \
+ const struct bch_##name *v; \
+ }; \
+ struct bkey_s_c s_c; \
+ }; \
+}; \
+ \
+static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \
+{ \
+ BUG_ON(k->k.type != nr); \
+ return container_of(&k->k, struct bkey_i_##name, k); \
+} \
+ \
+static inline const struct bkey_i_##name *bkey_i_to_##name##_c(const struct bkey_i *k)\
{ \
- BUG_ON(k->type != nr); \
- return container_of(k, struct bkey_i_##name, k); \
+ BUG_ON(k->k.type != nr); \
+ return container_of(&k->k, struct bkey_i_##name, k); \
} \
\
-static inline const struct bkey_i_##name *bkey_i_to_##name##_c(const struct bkey *k)\
+static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \
{ \
- BUG_ON(k->type != nr); \
- return container_of(k, struct bkey_i_##name, k); \
+ BUG_ON(k.k->type != nr); \
+ return (struct bkey_s_##name) { \
+ .k = k.k, \
+ .v = container_of(k.v, struct bch_##name, v), \
+ }; \
} \
\
-static inline struct bkey_i_##name *bkey_##name##_init(struct bkey *_k) \
+static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\
{ \
- struct bkey_i_##name *k = container_of(_k, struct bkey_i_##name, k);\
+ BUG_ON(k.k->type != nr); \
+ return (struct bkey_s_c_##name) { \
+ .k = k.k, \
+ .v = container_of(k.v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_s_##name name##_i_to_s(struct bkey_i_##name *k)\
+{ \
+ return (struct bkey_s_##name) { \
+ .k = &k->k, \
+ .v = &k->v, \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name name##_i_to_s_c(const struct bkey_i_##name *k)\
+{ \
+ return (struct bkey_s_c_##name) { \
+ .k = &k->k, \
+ .v = &k->v, \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name name##_s_to_s_c(struct bkey_s_##name k)\
+{ \
+ return (struct bkey_s_c_##name) { \
+ .k = k.k, \
+ .v = k.v, \
+ }; \
+} \
+ \
+static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \
+{ \
+ BUG_ON(k->k.type != nr); \
+ return (struct bkey_s_##name) { \
+ .k = &k->k, \
+ .v = container_of(&k->v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bkey_s_c_##name bkey_i_to_s_c_##name(const struct bkey_i *k)\
+{ \
+ BUG_ON(k->k.type != nr); \
+ return (struct bkey_s_c_##name) { \
+ .k = &k->k, \
+ .v = container_of(&k->v, struct bch_##name, v), \
+ }; \
+} \
+ \
+static inline struct bch_##name *bkey_p_##name##_val(const struct bkey_format *f,\
+ struct bkey_packed *k)\
+{ return container_of(bkeyp_val(f, k), struct bch_##name, v); \
+} \
+ \
+static inline const struct bch_##name *bkey_p_c_##name##_val(const struct bkey_format *f,\
+ const struct bkey_packed *k)\
+{ \
+ return container_of(bkeyp_val(f, k), struct bch_##name, v); \
+} \
+ \
+static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\
+{ \
+ struct bkey_i_##name *k = container_of(&_k->k, struct bkey_i_##name, k);\
\
bkey_init(&k->k); \
memset(&k->v, 0, sizeof(k->v)); \
@@ -114,4 +478,37 @@ BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT);
BKEY_VAL_ACCESSORS(xattr, BCH_XATTR);
+/* byte order helpers */
+
+#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
+#error edit for your odd byteorder.
+#endif
+
+#ifdef __LITTLE_ENDIAN
+
+#define high_bit_offset 0
+#define low_bit_offset KEY_PACKED_BITS_START
+
+#define __high_word(u64s, k) ((k)->_data + (u64s) - 1)
+#define __low_word(u64s, k) ((k)->_data)
+
+#define nth_word(p, n) ((p) - (n))
+
+#else
+
+#define high_bit_offset KEY_PACKED_BITS_START
+#define low_bit_offset 0
+
+#define __high_word(u64s, k) ((k)->_data)
+#define __low_word(u64s, k) ((k)->_data + (u64s) - 1)
+
+#define nth_word(p, n) ((p) + (n))
+
+#endif
+
+#define high_word(format, k) __high_word((format)->key_u64s, k)
+#define low_word(format, k) __low_word((format)->key_u64s, k)
+
+#define next_word(p) nth_word(p, 1)
+
#endif /* _BCACHE_BKEY_H */
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index c3774310c1b1..2b37fe623dd7 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -17,44 +17,44 @@ static const struct bkey_ops *bch_bkey_ops[] = {
bool bkey_invalid(struct cache_set *c,
enum bkey_type type,
- const struct bkey *k)
+ struct bkey_s_c k)
{
const struct bkey_ops *ops = bch_bkey_ops[type];
- if (k->u64s < BKEY_U64s)
+ if (k.k->u64s < BKEY_U64s)
return true;
- if (k->size &&
- (bkey_deleted(k) || !ops->is_extents))
+ if (k.k->size &&
+ (bkey_deleted(k.k) || !ops->is_extents))
return true;
- switch (k->type) {
+ switch (k.k->type) {
case KEY_TYPE_DELETED:
return false;
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
- return bkey_val_bytes(k) != 0;
+ return bkey_val_bytes(k.k) != 0;
case KEY_TYPE_COOKIE:
- return (bkey_val_bytes(k) != sizeof(struct bch_cookie));
+ return (bkey_val_bytes(k.k) != sizeof(struct bch_cookie));
default:
- if (k->type < KEY_TYPE_GENERIC_NR)
+ if (k.k->type < KEY_TYPE_GENERIC_NR)
return true;
return ops->key_invalid(c, k);
}
}
-void bkey_debugcheck(struct btree *b, struct bkey *k)
+void bkey_debugcheck(struct btree *b, struct bkey_s_c k)
{
enum bkey_type type = b->level ? BKEY_TYPE_BTREE : b->btree_id;
const struct bkey_ops *ops = bch_bkey_ops[type];
- BUG_ON(!k->u64s);
+ BUG_ON(!k.k->u64s);
- cache_set_bug_on(bkey_cmp(k->p, b->key.p) > 0,
+ cache_set_bug_on(bkey_cmp(k.k->p, b->key.k.p) > 0,
b->c, "key past end of btree node");
if (bkey_invalid(b->c, type, k)) {
@@ -65,21 +65,21 @@ void bkey_debugcheck(struct btree *b, struct bkey *k)
return;
}
- if (k->type >= KEY_TYPE_GENERIC_NR &&
+ if (k.k->type >= KEY_TYPE_GENERIC_NR &&
ops->key_debugcheck)
ops->key_debugcheck(b, k);
}
void bch_bkey_val_to_text(struct btree *b, char *buf,
- size_t size, const struct bkey *k)
+ size_t size, struct bkey_s_c k)
{
enum bkey_type type = b->level ? BKEY_TYPE_BTREE : b->btree_id;
const struct bkey_ops *ops = bch_bkey_ops[type];
char *out = buf, *end = buf + size;
- out += bch_bkey_to_text(out, end - out, k);
+ out += bch_bkey_to_text(out, end - out, k.k);
- if (k->type >= KEY_TYPE_GENERIC_NR &&
+ if (k.k->type >= KEY_TYPE_GENERIC_NR &&
ops->val_to_text) {
out += scnprintf(out, end - out, " -> ");
ops->val_to_text(b, out, end - out, k);
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index 2425deac938e..2f9b05a5c4d0 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -14,18 +14,17 @@ struct bkey;
struct bkey_ops {
bool (*key_invalid)(const struct cache_set *,
- const struct bkey *);
- void (*key_debugcheck)(struct btree *,
- const struct bkey *);
- void (*val_to_text)(const struct btree *, char *,
- size_t, const struct bkey *);
+ struct bkey_s_c);
+ void (*key_debugcheck)(struct btree *, struct bkey_s_c);
+ void (*val_to_text)(const struct btree *, char *, size_t,
+ struct bkey_s_c);
bool is_extents;
};
-bool bkey_invalid(struct cache_set *, enum bkey_type, const struct bkey *);
-void bkey_debugcheck(struct btree *, struct bkey *);
-void bch_bkey_val_to_text(struct btree *, char *, size_t, const struct bkey *);
+bool bkey_invalid(struct cache_set *, enum bkey_type, struct bkey_s_c);
+void bkey_debugcheck(struct btree *, struct bkey_s_c);
+void bch_bkey_val_to_text(struct btree *, char *, size_t, struct bkey_s_c);
#undef DEF_BTREE_ID
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c
index 49ff06572c7e..34257754910c 100644
--- a/drivers/md/bcache/blockdev.c
+++ b/drivers/md/bcache/blockdev.c
@@ -402,13 +402,13 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
struct closure cl;
closure_init_stack(&cl);
- bkey_inode_blockdev_init(&dc->disk.inode.k);
+ bkey_inode_blockdev_init(&dc->disk.inode.k_i);
dc->disk.inode.v.i_uuid = dc->sb.disk_uuid;
memcpy(dc->disk.inode.v.i_label, dc->sb.label, SB_LABEL_SIZE);
dc->disk.inode.v.i_inode.i_ctime = rtime;
dc->disk.inode.v.i_inode.i_mtime = rtime;
- ret = bch_inode_create(c, &dc->disk.inode.k,
+ ret = bch_inode_create(c, &dc->disk.inode.k_i,
0, BLOCKDEV_INODE_MAX,
&c->unused_inode_hint);
if (ret) {
@@ -425,7 +425,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
closure_sync(&cl);
} else {
dc->disk.inode.v.i_inode.i_mtime = rtime;
- bch_inode_update(c, &dc->disk.inode.k);
+ bch_inode_update(c, &dc->disk.inode.k_i);
}
/* Count dirty sectors before attaching */
@@ -644,7 +644,7 @@ static void flash_dev_flush(struct closure *cl)
}
static int flash_dev_run(struct cache_set *c,
- const struct bkey_i_inode_blockdev *inode)
+ struct bkey_s_c_inode_blockdev inode)
{
struct bcache_device *d = kzalloc(sizeof(struct bcache_device),
GFP_KERNEL);
@@ -653,14 +653,15 @@ static int flash_dev_run(struct cache_set *c,
if (!d)
return ret;
- d->inode = *inode;
+ bkey_reassemble(&d->inode.k_i, to_bkey_s_c(inode));
closure_init(&d->cl, NULL);
set_closure_fn(&d->cl, flash_dev_flush, system_wq);
kobject_init(&d->kobj, &bch_flash_dev_ktype);
- ret = bcache_device_init(d, block_bytes(c), inode->v.i_inode.i_size >> 9);
+ ret = bcache_device_init(d, block_bytes(c),
+ inode.v->i_inode.i_size >> 9);
if (ret)
goto err;
@@ -685,23 +686,23 @@ err:
int flash_devs_run(struct cache_set *c)
{
struct btree_iter iter;
- const struct bkey *k;
- const struct bkey_i_inode_blockdev *inode;
+ struct bkey_s_c k;
+ struct bkey_s_c_inode_blockdev inode;
int ret = 0;
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINVAL;
for_each_btree_key(&iter, c, BTREE_ID_INODES, k, POS_MIN) {
- if (k->p.inode >= BLOCKDEV_INODE_MAX)
+ if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
break;
- if (k->type != BCH_INODE_BLOCKDEV)
+ if (k.k->type != BCH_INODE_BLOCKDEV)
continue;
- inode = bkey_i_to_inode_blockdev_c(k);
+ inode = bkey_s_c_to_inode_blockdev(k);
- if (INODE_FLASH_ONLY(&inode->v)) {
+ if (INODE_FLASH_ONLY(inode.v)) {
ret = flash_dev_run(c, inode);
if (ret) {
bch_cache_set_error(c, "can't bring up flash volumes: %i", ret);
@@ -720,21 +721,21 @@ int bch_flash_dev_create(struct cache_set *c, u64 size)
struct bkey_i_inode_blockdev inode;
int ret;
- bkey_inode_blockdev_init(&inode.k);
+ bkey_inode_blockdev_init(&inode.k_i);
get_random_bytes(&inode.v.i_uuid, sizeof(inode.v.i_uuid));
inode.v.i_inode.i_ctime = rtime;
inode.v.i_inode.i_mtime = rtime;
inode.v.i_inode.i_size = size;
SET_INODE_FLASH_ONLY(&inode.v, 1);
- ret = bch_inode_create(c, &inode.k, 0, BLOCKDEV_INODE_MAX,
+ ret = bch_inode_create(c, &inode.k_i, 0, BLOCKDEV_INODE_MAX,
&c->unused_inode_hint);
if (ret) {
pr_err("Can't create volume: %d", ret);
return ret;
}
- return flash_dev_run(c, &inode);
+ return flash_dev_run(c, inode_blockdev_i_to_s_c(&inode));
}
void bch_blockdev_exit(void)
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 709de4ccf148..135d1df2a55c 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -16,38 +16,48 @@
#include <linux/random.h>
#include <linux/prefetch.h>
-static bool keys_out_of_order(struct bkey *prev, struct bkey *next,
+static bool keys_out_of_order(const struct bkey_format *f,
+ const struct bkey_packed *prev,
+ const struct bkey_packed *next,
bool is_extents)
{
- return bkey_cmp(prev->p, bkey_start_pos(next)) > 0 ||
+ struct bkey nextu = bkey_unpack_key(f, next);
+
+ return bkey_cmp_left_packed(f, prev, bkey_start_pos(&nextu)) > 0 ||
((is_extents
? !bkey_deleted(next)
: !bkey_deleted(prev)) &&
- !bkey_cmp(prev->p, next->p));
+ !bkey_cmp_packed(f, prev, next));
}
#ifdef CONFIG_BCACHE_DEBUG
void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
{
- struct bkey *k, *next;
+ struct bkey_format *f = &b->set->data->format;
+ struct bkey_packed *_k, *_n;
+ struct bkey k, n;
char buf[80];
- for (k = i->start; k < bset_bkey_last(i); k = next) {
- next = bkey_next(k);
+ for (_k = i->start, k = bkey_unpack_key(f, _k);
+ _k < bset_bkey_last(i);
+ _k = _n, k = n) {
+ _n = bkey_next(_k);
- bch_bkey_to_text(buf, sizeof(buf), k);
- printk(KERN_ERR "block %u key %u/%u: %s\n", set,
- (unsigned) ((u64 *) k - i->_data), i->u64s, buf);
+ bch_bkey_to_text(buf, sizeof(buf), &k);
+ printk(KERN_ERR "block %u key %zi/%u: %s\n", set,
+ _k->_data - i->_data, i->u64s, buf);
- if (next == bset_bkey_last(i))
+ if (_n == bset_bkey_last(i))
continue;
- if (bkey_cmp(bkey_start_pos(next), k->p) < 0)
+ n = bkey_unpack_key(f, _n);
+
+ if (bkey_cmp(bkey_start_pos(&n), k.p) < 0)
printk(KERN_ERR "Key skipped backwards\n");
else if (!b->ops->is_extents &&
- !bkey_deleted(k) &&
- !bkey_cmp(next->p, k->p))
+ !bkey_deleted(&k) &&
+ !bkey_cmp(n.p, k.p))
printk(KERN_ERR "Duplicate keys\n");
}
}
@@ -66,22 +76,23 @@ void bch_dump_bucket(struct btree_keys *b)
s64 bch_count_data(struct btree_keys *b)
{
struct btree_node_iter iter;
- struct bkey *k;
+ struct bkey_tup k;
u64 ret = 0;
if (!btree_keys_expensive_checks(b))
return -1;
if (b->ops->is_extents)
- for_each_btree_node_key_all(b, k, &iter)
- ret += k->size;
+ for_each_btree_node_key_unpack(b, &k, &iter)
+ ret += k.k.size;
+
return ret;
}
void bch_verify_nr_live_u64s(struct btree_keys *b)
{
- struct bkey *k;
struct btree_node_iter iter;
+ struct bkey_packed *k;
size_t u64s = 0;
for_each_btree_node_key(b, k, &iter)
@@ -95,13 +106,15 @@ void bch_verify_nr_live_u64s(struct btree_keys *b)
/* Auxiliary search trees */
/* 32 bits total: */
-#define BKEY_MID_BITS 5
-#define BKEY_EXPONENT_BITS 8
+#define BKEY_MID_BITS 5U
+#define BKEY_EXPONENT_BITS 8U
#define BKEY_MANTISSA_BITS (32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
#define BFLOAT_FAILED ((1 << BKEY_EXPONENT_BITS) - 1)
+#define KEY_WORDS BITS_TO_LONGS(1 << BKEY_EXPONENT_BITS)
+
struct bkey_float {
unsigned exponent:BKEY_EXPONENT_BITS;
unsigned m:BKEY_MID_BITS;
@@ -368,20 +381,21 @@ void inorder_test(void)
* of the previous key so we can walk backwards to it from t->tree[j]'s key.
*/
-static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline,
- unsigned offset)
+static struct bkey_packed *cacheline_to_bkey(struct bset_tree *t,
+ unsigned cacheline,
+ unsigned offset)
{
return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
}
-static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
+static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey_packed *k)
{
return ((void *) k - (void *) t->data) / BSET_CACHELINE;
}
static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
unsigned cacheline,
- struct bkey *k)
+ struct bkey_packed *k)
{
size_t m = (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
@@ -389,12 +403,12 @@ static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
return m;
}
-static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
+static struct bkey_packed *tree_to_bkey(struct bset_tree *t, unsigned j)
{
return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
}
-static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
+static struct bkey_packed *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
{
return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
}
@@ -403,57 +417,68 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
* For the write set - the one we're currently inserting keys into - we don't
* maintain a full search tree, we just keep a simple lookup table in t->prev.
*/
-static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
+static struct bkey_packed *table_to_bkey(struct bset_tree *t,
+ unsigned cacheline)
{
return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
}
-static inline unsigned bfloat_mantissa(struct bpos pos,
- struct bkey_float *f)
+static inline unsigned bfloat_mantissa(const struct bkey_packed *k,
+ const struct bkey_float *f)
{
-#ifdef __LITTLE_ENDIAN
- u64 *ptr = (u64 *) (pos.kw + (f->exponent >> 5));
-#else
- u64 *ptr = (u64 *) (pos.kw - (f->exponent >> 5));
-#endif
+ u64 *ptr;
+
+ EBUG_ON(!bkey_packed(k));
+
+ ptr = (u64 *) (((u32 *) k->_data) + (f->exponent >> 5));
+
return (get_unaligned(ptr) >> (f->exponent & 31)) &
BKEY_MANTISSA_MASK;
}
-static void make_bfloat(struct bset_tree *t, unsigned j)
+static void make_bfloat(struct bkey_format *format,
+ struct bset_tree *t, unsigned j)
{
struct bkey_float *f = &t->tree[j];
- struct bkey *m = tree_to_bkey(t, j);
- struct bkey *p = tree_to_prev_bkey(t, j);
+ struct bkey_packed *m = tree_to_bkey(t, j);
+ struct bkey_packed *p = tree_to_prev_bkey(t, j);
- struct bkey *l = is_power_of_2(j)
+ struct bkey_packed *l = is_power_of_2(j)
? t->data->start
: tree_to_prev_bkey(t, j >> ffs(j));
- struct bkey *r = is_power_of_2(j + 1)
+ struct bkey_packed *r = is_power_of_2(j + 1)
? bset_bkey_idx(t->data, t->data->u64s - t->end.u64s)
: tree_to_bkey(t, j >> (ffz(j) + 1));
+ unsigned exponent, shift;
BUG_ON(m < l || m > r);
BUG_ON(bkey_next(p) != m);
- if (l->p.inode ^ r->p.inode)
- f->exponent = fls64(l->p.inode ^ r->p.inode) + 96;
- else if (l->p.offset ^ r->p.offset)
- f->exponent = fls64(l->p.offset ^ r->p.offset) + 32;
- else
- f->exponent = fls64(l->p.snapshot ^ r->p.snapshot);
-
- f->exponent = max_t(int, f->exponent - BKEY_MANTISSA_BITS, 0);
-
/*
- * Setting f->exponent = 127 flags this node as failed, and causes the
- * lookup code to fall back to comparing against the original key.
+ * for failed bfloats, the lookup code falls back to comparing against
+ * the original key.
*/
+ f->exponent = BFLOAT_FAILED;
- if (bfloat_mantissa(m->p, f) != bfloat_mantissa(p->p, f))
- f->mantissa = bfloat_mantissa(m->p, f) - 1;
- else
+ if (!bkey_packed(l) || !bkey_packed(r) ||
+ !bkey_packed(p) || !bkey_packed(m))
+ return;
+
+ exponent = max_t(int, bkey_greatest_differing_bit(format, l, r) -
+ BKEY_MANTISSA_BITS + 1, 0);
+
+#ifdef __LITTLE_ENDIAN
+ shift = min(format->key_u64s * 64 - bkey_format_key_bits(format) + exponent,
+ format->key_u64s * 64 - BKEY_MANTISSA_BITS);
+#endif
+ BUG_ON(shift >= BFLOAT_FAILED);
+
+ f->exponent = shift;
+ f->mantissa = bfloat_mantissa(m, f) - 1;
+
+ if (bfloat_mantissa(m, f) == bfloat_mantissa(p, f) &&
+ shift > format->key_u64s * 64 - bkey_format_key_bits(format))
f->exponent = BFLOAT_FAILED;
}
@@ -493,8 +518,12 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i)
if (i != b->set->data) {
b->set[++b->nsets].data = i;
i->seq = b->set->data->seq;
- } else
+ i->format = b->set->data->format;
+ } else {
+ bch_bkey_format_init(&b->set->data->format);
+ bch_bkey_format_done(&b->set->data->format);
get_random_bytes(&i->seq, sizeof(uint64_t));
+ }
bch_bset_build_unwritten_tree(b);
}
@@ -503,7 +532,7 @@ EXPORT_SYMBOL(bch_bset_init_next);
void bch_bset_build_written_tree(struct btree_keys *b)
{
struct bset_tree *t = bset_tree_last(b);
- struct bkey *prev = NULL, *k = t->data->start;
+ struct bkey_packed *prev = NULL, *k = t->data->start;
unsigned j, cacheline = 1;
b->last_set_unwritten = 0;
@@ -549,14 +578,15 @@ retry:
for (j = inorder_next(0, t->size);
j;
j = inorder_next(j, t->size))
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
}
EXPORT_SYMBOL(bch_bset_build_written_tree);
-struct bkey *bkey_prev(struct btree_keys *b, struct bset_tree *t,
- struct bkey *k)
+struct bkey_packed *bkey_prev(struct btree_keys *b,
+ struct bset_tree *t,
+ struct bkey_packed *k)
{
- struct bkey *p;
+ struct bkey_packed *p;
int j;
if (k == t->data->start)
@@ -585,18 +615,21 @@ struct bkey *bkey_prev(struct btree_keys *b, struct bset_tree *t,
/* Insert */
static void verify_insert_pos(struct btree_keys *b,
- struct bkey *prev,
- struct bkey *where,
- struct bkey *insert)
+ const struct bkey_packed *prev,
+ const struct bkey_packed *where,
+ const struct bkey_i *insert)
{
#ifdef CONFIG_BCACHE_DEBUG
+ const struct bkey_format *f = &b->set->data->format;
struct bset_tree *t = bset_tree_last(b);
BUG_ON(prev &&
- keys_out_of_order(prev, insert, b->ops->is_extents));
+ keys_out_of_order(f, prev, bkey_to_packed_c(insert),
+ b->ops->is_extents));
BUG_ON(where != bset_bkey_last(t->data) &&
- keys_out_of_order(insert, where, b->ops->is_extents));
+ keys_out_of_order(f, bkey_to_packed_c(insert), where,
+ b->ops->is_extents));
#endif
}
@@ -607,7 +640,7 @@ static void verify_insert_pos(struct btree_keys *b,
* @top must be in the last bset.
*/
static void bch_btree_node_iter_fix(struct btree_node_iter *iter,
- const struct bkey *where)
+ const struct bkey_packed *where)
{
struct btree_node_iter_set *set;
u64 n = where->u64s;
@@ -618,10 +651,10 @@ static void bch_btree_node_iter_fix(struct btree_node_iter *iter,
set < iter->data + iter->used;
set++)
if (set->end >= where) {
- set->end = (struct bkey *) ((u64 *) set->end + n);
+ set->end = (void *) ((u64 *) set->end + n);
if (set->k >= where)
- set->k = (struct bkey *) ((u64 *) set->k + n);
+ set->k = (void *) ((u64 *) set->k + n);
break;
}
}
@@ -631,7 +664,7 @@ static void bch_btree_node_iter_fix(struct btree_node_iter *iter,
* modified, fix any auxiliary search tree by remaking all the nodes in the
* auxiliary search tree that @k corresponds to
*/
-void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
+void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey_packed *k)
{
struct bset_tree *t;
unsigned inorder, j = 1;
@@ -649,13 +682,13 @@ found_set:
if (k == t->data->start)
for (j = 1; j < t->size; j = j * 2)
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
if (bkey_next(k) == bset_bkey_last(t->data)) {
t->end = *k;
for (j = 1; j < t->size; j = j * 2 + 1)
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
}
j = inorder_to_tree(inorder, t);
@@ -664,11 +697,11 @@ found_set:
j < t->size &&
k == tree_to_bkey(t, j)) {
/* Fix the auxiliary search tree node this key corresponds to */
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
/* Children for which this key is the right side boundary */
for (j = j * 2; j < t->size; j = j * 2 + 1)
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
}
j = inorder_to_tree(inorder + 1, t);
@@ -676,18 +709,18 @@ found_set:
if (j &&
j < t->size &&
k == tree_to_prev_bkey(t, j)) {
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
/* Children for which this key is the left side boundary */
for (j = j * 2 + 1; j < t->size; j = j * 2)
- make_bfloat(t, j);
+ make_bfloat(&b->set->data->format, t, j);
}
}
EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
- struct bkey *k)
+ struct bkey_packed *k)
{
unsigned shift = k->u64s;
unsigned j = bkey_to_cacheline(t, k);
@@ -736,24 +769,27 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
void bch_bset_insert(struct btree_keys *b,
struct btree_node_iter *iter,
- struct bkey *insert)
+ struct bkey_i *insert)
{
+ struct bkey_format *f = &b->set->data->format;
struct bset_tree *t = bset_tree_last(b);
struct bset *i = t->data;
- struct bkey *prev = NULL;
- struct bkey *where = bch_btree_node_iter_bset_pos(iter, i) ?:
+ struct bkey_packed *prev = NULL;
+ struct bkey_packed *where = bch_btree_node_iter_bset_pos(iter, i) ?:
bset_bkey_last(i);
+ struct bkey_packed packed, *src;
BKEY_PADDED(k) tmp;
- BUG_ON(insert->u64s > bch_btree_keys_u64s_remaining(b));
+ BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(b));
BUG_ON(b->ops->is_extents &&
- (!insert->size || bkey_deleted(insert)));
+ (!insert->k.size || bkey_deleted(&insert->k)));
BUG_ON(!b->last_set_unwritten);
BUG_ON(where < i->start);
BUG_ON(where > bset_bkey_last(i));
while (where != bset_bkey_last(i) &&
- keys_out_of_order(insert, where, b->ops->is_extents))
+ keys_out_of_order(f, bkey_to_packed(insert),
+ where, b->ops->is_extents))
prev = where, where = bkey_next(where);
if (!prev)
@@ -763,16 +799,17 @@ void bch_bset_insert(struct btree_keys *b,
/* prev is in the tree, if we merge we're done */
if (prev &&
- bch_bkey_try_merge_inline(b, iter, prev, insert))
+ bch_bkey_try_merge_inline(b, iter, prev, bkey_to_packed(insert)))
return;
- if (where != bset_bkey_last(i) &&
- b->ops->is_extents &&
- where->u64s == insert->u64s && !where->size) {
- if (!bkey_deleted(insert))
- b->nr_live_u64s += insert->u64s;
+ if (b->ops->is_extents &&
+ where != bset_bkey_last(i) &&
+ where->u64s == insert->k.u64s &&
+ bkey_deleted(where)) {
+ if (!bkey_deleted(&insert->k))
+ b->nr_live_u64s += insert->k.u64s;
- bkey_copy(where, insert);
+ bkey_copy((void *) where, insert);
/*
* We're modifying a key that might be the btree node iter's
@@ -786,7 +823,7 @@ void bch_bset_insert(struct btree_keys *b,
}
if (where != bset_bkey_last(i) &&
- bkey_bytes(insert) <= sizeof(tmp)) {
+ bkey_bytes(&insert->k) <= sizeof(tmp)) {
bkey_copy(&tmp.k, insert);
insert = &tmp.k;
@@ -797,21 +834,28 @@ void bch_bset_insert(struct btree_keys *b,
* a copy of insert, since ->insert_fixup() might trim insert if
* this is a replace operation)
*/
- if (bch_bkey_try_merge_inline(b, iter, insert, where)) {
- bkey_copy(where, insert);
+ if (bch_bkey_try_merge_inline(b, iter,
+ bkey_to_packed(insert),
+ where))
return;
- }
}
- memmove((uint64_t *) where + insert->u64s,
+ src = bkey_pack_key(&packed, &insert->k, f)
+ ? &packed
+ : bkey_to_packed(insert);
+
+ memmove((u64 *) where + src->u64s,
where,
(void *) bset_bkey_last(i) - (void *) where);
- bkey_copy(where, insert);
- i->u64s += insert->u64s;
+ memcpy(where, src,
+ bkeyp_key_bytes(f, src));
+ memcpy(bkeyp_val(f, where), &insert->v,
+ bkeyp_val_bytes(f, src));
+ i->u64s += src->u64s;
- if (!bkey_deleted(insert))
- b->nr_live_u64s += insert->u64s;
+ if (!bkey_deleted(&insert->k))
+ b->nr_live_u64s += src->u64s;
bch_bset_fix_lookup_table(b, t, where);
bch_btree_node_iter_fix(iter, where);
@@ -823,15 +867,18 @@ EXPORT_SYMBOL(bch_bset_insert);
/* Lookup */
__attribute__((flatten))
-static struct bkey *bset_search_write_set(struct bset_tree *t,
- struct bpos search)
+static struct bkey_packed *bset_search_write_set(const struct bkey_format *f,
+ struct bset_tree *t,
+ const struct bkey_packed *packed_search,
+ struct bpos search)
{
unsigned li = 0, ri = t->size;
while (li + 1 != ri) {
unsigned m = (li + ri) >> 1;
- if (bkey_cmp(table_to_bkey(t, m)->p, search) >= 0)
+ if (bkey_cmp_p_or_unp(f, table_to_bkey(t, m),
+ packed_search, search) >= 0)
ri = m;
else
li = m;
@@ -841,11 +888,13 @@ static struct bkey *bset_search_write_set(struct bset_tree *t,
}
__attribute__((flatten))
-static struct bkey *bset_search_tree(struct bset_tree *t,
- struct bpos search)
+static struct bkey_packed *bset_search_tree(const struct bkey_format *format,
+ struct bset_tree *t,
+ struct bpos search)
{
struct bkey_float *f = &t->tree[1];
unsigned inorder, n = 1;
+ struct bkey_packed packed_search;
/* don't ask. */
if (!search.snapshot-- &&
@@ -853,6 +902,15 @@ static struct bkey *bset_search_tree(struct bset_tree *t,
!search.inode--)
BUG();
+ /*
+ * If there are bits in search that don't fit in the packed format,
+ * packed_search will always compare less than search - it'll
+ * effectively have 0s where search did not - so we can still use
+ * packed_search and we'll just do more linear searching than we would
+ * have.
+ */
+ bkey_pack_pos_lossy(&packed_search, search, format);
+
while (1) {
if (likely(n << 4 < t->size)) {
prefetch(&t->tree[n << 4]);
@@ -878,12 +936,14 @@ static struct bkey *bset_search_tree(struct bset_tree *t,
if (likely(f->exponent != BFLOAT_FAILED))
n = n * 2 + (((unsigned)
(f->mantissa -
- bfloat_mantissa(search, f))) >> 31);
+ bfloat_mantissa(&packed_search,
+ f))) >> 31);
else
- n = (bkey_cmp(tree_to_bkey(t, n)->p, search) > 0)
+ n = bkey_cmp_p_or_unp(format, tree_to_bkey(t, n),
+ &packed_search, search) > 0
? n * 2
: n * 2 + 1;
- }
+ } while (n < t->size);
inorder = to_inorder(n >> 1, t);
@@ -906,10 +966,13 @@ static struct bkey *bset_search_tree(struct bset_tree *t,
* Returns the first key greater than or equal to @search
*/
__always_inline
-static struct bkey *bch_bset_search(struct btree_keys *b, struct bset_tree *t,
- struct bpos search)
+static struct bkey_packed *bch_bset_search(struct btree_keys *b,
+ struct bset_tree *t,
+ struct bpos search,
+ struct bkey_packed *packed_search)
{
- struct bkey *m;
+ const struct bkey_format *f = &b->set->data->format;
+ struct bkey_packed *m;
/*
* First, we search for a cacheline, then lastly we do a linear search
@@ -936,28 +999,32 @@ static struct bkey *bch_bset_search(struct btree_keys *b, struct bset_tree *t,
* start and end - handle that here:
*/
- if (unlikely(bkey_cmp(search, t->end.p) > 0))
+ if (unlikely(bkey_cmp_p_or_unp(f, &t->end,
+ packed_search, search) < 0))
return bset_bkey_last(t->data);
- if (unlikely(bkey_cmp(search, t->data->start->p) <= 0))
+ if (unlikely(bkey_cmp_p_or_unp(f, t->data->start,
+ packed_search, search) >= 0))
return t->data->start;
- m = bset_search_tree(t, search);
+ m = bset_search_tree(f, t, search);
} else {
EBUG_ON(!b->nsets &&
t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
- m = bset_search_write_set(t, search);
+ m = bset_search_write_set(f, t, packed_search, search);
}
while (m != bset_bkey_last(t->data) &&
- bkey_cmp(m->p, search) < 0)
+ bkey_cmp_p_or_unp(f, m,
+ packed_search, search) < 0)
m = bkey_next(m);
if (btree_keys_expensive_checks(b)) {
- struct bkey *p = bkey_prev(b, t, m);
+ struct bkey_packed *p = bkey_prev(b, t, m);
- BUG_ON(p && bkey_cmp(p->p, search) >= 0);
+ BUG_ON(p &&
+ bkey_cmp_p_or_unp(f, p, packed_search, search) >= 0);
}
return m;
@@ -969,7 +1036,7 @@ static inline bool btree_node_iter_cmp(struct btree_node_iter *iter,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
- s64 c = bkey_cmp(l.k->p, r.k->p);
+ s64 c = bkey_cmp_packed(&iter->b->set->data->format, l.k, r.k);
/*
* For non extents, when keys compare equal the deleted keys have to
@@ -986,7 +1053,7 @@ static inline bool btree_node_iter_cmp(struct btree_node_iter *iter,
}
void bch_btree_node_iter_push(struct btree_node_iter *iter,
- struct bkey *k, struct bkey *end)
+ struct bkey_packed *k, struct bkey_packed *end)
{
if (k != end) {
struct btree_node_iter_set n =
@@ -1015,10 +1082,7 @@ static void __bch_btree_node_iter_init(struct btree_keys *b,
iter->size = ARRAY_SIZE(iter->data);
iter->used = 0;
iter->is_extents = b->ops->is_extents;
-
-#ifdef CONFIG_BCACHE_DEBUG
- iter->b = b;
-#endif
+ iter->b = b;
}
void bch_btree_node_iter_init(struct btree_keys *b,
@@ -1026,12 +1090,15 @@ void bch_btree_node_iter_init(struct btree_keys *b,
struct bpos search)
{
struct bset_tree *t;
+ struct bkey_packed p, *packed_search =
+ bkey_pack_pos(&p, search, &b->set->data->format) ? &p : NULL;
__bch_btree_node_iter_init(b, iter, b->set);
for (t = b->set; t <= b->set + b->nsets; t++)
bch_btree_node_iter_push(iter,
- bch_bset_search(b, t, search),
+ bch_bset_search(b, t, search,
+ packed_search),
bset_bkey_last(t->data));
}
EXPORT_SYMBOL(bch_btree_node_iter_init);
@@ -1050,8 +1117,8 @@ void bch_btree_node_iter_init_from_start(struct btree_keys *b,
}
EXPORT_SYMBOL(bch_btree_node_iter_init_from_start);
-struct bkey *bch_btree_node_iter_bset_pos(struct btree_node_iter *iter,
- struct bset *i)
+struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *iter,
+ struct bset *i)
{
struct btree_node_iter_set *set;
@@ -1138,22 +1205,28 @@ next:
}
static void bch_btree_node_iter_next_check(struct btree_node_iter *iter,
- struct bkey *k)
+ struct bkey_packed *k)
{
+ const struct bkey_format *f = &iter->b->set->data->format;
+
+ bkey_unpack_key(f, k);
+
if (!bch_btree_node_iter_end(iter) &&
- keys_out_of_order(k, iter->data->k, iter->is_extents)) {
+ keys_out_of_order(f, k, iter->data->k, iter->is_extents)) {
+ struct bkey ku = bkey_unpack_key(f, k);
+ struct bkey nu = bkey_unpack_key(f, iter->data->k);
char buf1[80], buf2[80];
bch_dump_bucket(iter->b);
- bch_bkey_to_text(buf1, sizeof(buf1), k);
- bch_bkey_to_text(buf2, sizeof(buf2), iter->data->k);
+ bch_bkey_to_text(buf1, sizeof(buf1), &ku);
+ bch_bkey_to_text(buf2, sizeof(buf2), &nu);
panic("out of order/overlapping:\n%s\n%s\n", buf1, buf2);
}
}
-struct bkey *bch_btree_node_iter_next_all(struct btree_node_iter *iter)
+struct bkey_packed *bch_btree_node_iter_next_all(struct btree_node_iter *iter)
{
- struct bkey *ret = bch_btree_node_iter_peek_all(iter);
+ struct bkey_packed *ret = bch_btree_node_iter_peek_all(iter);
if (ret) {
bch_btree_node_iter_advance(iter);
@@ -1165,6 +1238,21 @@ struct bkey *bch_btree_node_iter_next_all(struct btree_node_iter *iter)
EXPORT_SYMBOL(bch_btree_node_iter_next_all);
#endif
+bool bch_btree_node_iter_next_unpack(struct btree_keys *b,
+ struct btree_node_iter *iter,
+ struct bkey_tup *tup)
+{
+ struct bkey_format *f = &b->set->data->format;
+ struct bkey_packed *k = bch_btree_node_iter_next(iter);
+
+ if (!k)
+ return false;
+
+ bkey_disassemble(tup, f, k);
+ return true;
+}
+EXPORT_SYMBOL(bch_btree_node_iter_next_unpack);
+
/* Mergesort */
void bch_bset_sort_state_free(struct bset_sort_state *state)
@@ -1193,26 +1281,58 @@ static void btree_mergesort(struct btree_keys *b, struct bset *bset,
struct btree_node_iter *iter,
ptr_filter_fn filter, bool merge)
{
- struct bkey *k, *prev = NULL, *out = bset->start;
+ struct bkey_format *in_f = &b->set->data->format;
+ struct bkey_format *out_f = &bset->format;
+ struct bkey_packed *k, *prev = NULL, *out = bset->start;
+ struct bkey_tup tup;
while (!bch_btree_node_iter_end(iter)) {
k = bch_btree_node_iter_next_all(iter);
- bkey_copy(out, k);
+ BUG_ON((void *) __bkey_idx(out, k->u64s) >
+ (void *) bset + (PAGE_SIZE << b->page_order));
- if (filter && filter(b, out))
- continue;
+ bkey_disassemble(&tup, in_f, k);
- if (bkey_deleted(out))
+ if (filter && filter(b, bkey_tup_to_s(&tup)))
continue;
- if (merge && prev && bch_bkey_try_merge(b, prev, out))
+ if (bkey_deleted(&tup.k))
continue;
+ if (merge && prev && b->ops->key_merge) {
+ BKEY_PADDED(k) tmp;
+
+ BUG_ON(bkey_bytes(&tup.k) > sizeof(tmp));
+
+ bkey_reassemble(&tmp.k, bkey_tup_to_s_c(&tup));
+
+ if (bch_bkey_try_merge(b, (void *) prev, &tmp.k))
+ continue;
+
+ bkey_disassemble(&tup, in_f, bkey_to_packed(&tmp.k));
+ }
+
+ if (prev) {
+ bkey_pack(prev, (void *) prev, out_f);
+ out = bkey_next(prev);
+ } else {
+ out = bset->start;
+ }
+
+ bkey_reassemble((void *) out, bkey_tup_to_s_c(&tup));
+
prev = out;
out = bkey_next(out);
}
+ if (prev) {
+ bkey_pack(prev, (void *) prev, out_f);
+ out = bkey_next(prev);
+ } else {
+ out = bset->start;
+ }
+
bset->u64s = (u64 *) out - bset->_data;
pr_debug("sorted %i keys", bset->u64s);
@@ -1240,6 +1360,8 @@ static void __btree_sort(struct btree_keys *b, struct btree_node_iter *iter,
start_time = local_clock();
+ out->format = b->set->data->format;
+
/*
* If we're only doing a partial sort (start != 0), then we can't merge
* extents because that might produce extents that overlap with 0 size
@@ -1250,18 +1372,20 @@ static void __btree_sort(struct btree_keys *b, struct btree_node_iter *iter,
else
sort(b, out, iter);
+ BUG_ON(set_bytes(out) > (PAGE_SIZE << b->page_order));
+
b->nsets = start;
if (!start && order == b->page_order) {
+ unsigned u64s = out->u64s;
/*
* Our temporary buffer is the same size as the btree node's
* buffer, we can just swap buffers instead of doing a big
* memcpy()
*/
- out->magic = b->set->data->magic;
- out->seq = b->set->data->seq;
- out->version = b->set->data->version;
+ *out = *b->set->data;
+ out->u64s = u64s;
swap(out, b->set->data);
} else {
b->set[start].data->u64s = out->u64s;
@@ -1342,6 +1466,8 @@ void bch_btree_sort_into(struct btree_keys *dst,
btree_mergesort(src, dst->set->data, &iter, filter, true);
+ BUG_ON(set_bytes(dst->set->data) > (PAGE_SIZE << dst->page_order));
+
bch_time_stats_update(&state->time, start_time);
dst->nr_live_u64s = dst->set->data->u64s;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 9bf7fa4a311c..e035b98f1495 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -166,7 +166,7 @@ struct bset_tree {
unsigned extra;
/* copy of the last key in the set */
- struct bkey end;
+ struct bkey_packed end;
struct bkey_float *tree;
/*
@@ -183,13 +183,30 @@ struct bset_tree {
struct bset *data;
};
+typedef bool (*ptr_filter_fn)(struct btree_keys *, struct bkey_s);
+
+typedef bool (*iter_cmp_fn)(struct btree_node_iter_set,
+ struct btree_node_iter_set);
+
+enum merge_result {
+ BCH_MERGE_NOMERGE,
+
+ /*
+ * The keys were mergable, but would have overflowed size - so instead l
+ * was changed to the maximum size, and both keys were modified:
+ */
+ BCH_MERGE_PARTIAL,
+ BCH_MERGE_MERGE,
+};
+
struct btree_keys_ops {
- bool (*key_normalize)(struct btree_keys *, struct bkey *);
- bool (*key_merge)(struct btree_keys *,
- struct bkey *, struct bkey *);
+ ptr_filter_fn key_normalize;
+ enum merge_result (*key_merge)(struct btree_keys *,
+ struct bkey_i *, struct bkey_i *);
bool (*key_merge_inline)(struct btree_keys *,
struct btree_node_iter *,
- struct bkey *, struct bkey *);
+ struct bkey_packed *,
+ struct bkey_packed *);
/*
* Only used for deciding whether to use bkey_start_pos(k) or just the
@@ -242,7 +259,7 @@ static inline bool bset_written(struct btree_keys *b, struct bset_tree *t)
return t <= b->set + b->nsets - b->last_set_unwritten;
}
-static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
+static inline bool bkey_written(struct btree_keys *b, struct bkey_packed *k)
{
return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
}
@@ -296,16 +313,17 @@ void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
void bch_bset_init_next(struct btree_keys *, struct bset *);
void bch_bset_build_written_tree(struct btree_keys *);
-void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
+void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey_packed *);
void bch_bset_insert(struct btree_keys *, struct btree_node_iter *,
- struct bkey *);
+ struct bkey_i *);
/* Bkey utility code */
-struct bkey *bkey_prev(struct btree_keys *, struct bset_tree *, struct bkey *);
+struct bkey_packed *bkey_prev(struct btree_keys *, struct bset_tree *,
+ struct bkey_packed *);
-static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
+static inline struct bkey_packed *bset_bkey_idx(struct bset *i, unsigned idx)
{
return bkey_idx(i, idx);
}
@@ -316,16 +334,18 @@ static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
* key, r will be untouched.
*/
static inline bool bch_bkey_try_merge(struct btree_keys *b,
- struct bkey *l, struct bkey *r)
+ struct bkey_i *l,
+ struct bkey_i *r)
{
return b->ops->key_merge
- ? b->ops->key_merge(b, l, r)
+ ? b->ops->key_merge(b, l, r) == BCH_MERGE_MERGE
: false;
}
static inline bool bch_bkey_try_merge_inline(struct btree_keys *b,
struct btree_node_iter *iter,
- struct bkey *l, struct bkey *r)
+ struct bkey_packed *l,
+ struct bkey_packed *r)
{
return b->ops->key_merge_inline
? b->ops->key_merge_inline(b, iter, l, r)
@@ -366,22 +386,20 @@ struct btree_node_iter {
unsigned size:24;
unsigned used;
-#ifdef CONFIG_BCACHE_DEBUG
struct btree_keys *b;
-#endif
struct btree_node_iter_set {
- struct bkey *k, *end;
+ struct bkey_packed *k, *end;
} data[MAX_BSETS];
};
void bch_btree_node_iter_push(struct btree_node_iter *,
- struct bkey *, struct bkey *);
+ struct bkey_packed *, struct bkey_packed *);
void bch_btree_node_iter_init(struct btree_keys *, struct btree_node_iter *,
struct bpos);
void bch_btree_node_iter_init_from_start(struct btree_keys *,
struct btree_node_iter *);
-struct bkey *bch_btree_node_iter_bset_pos(struct btree_node_iter *,
- struct bset *);
+struct bkey_packed *bch_btree_node_iter_bset_pos(struct btree_node_iter *,
+ struct bset *);
void bch_btree_node_iter_sort(struct btree_node_iter *);
void bch_btree_node_iter_advance(struct btree_node_iter *);
@@ -391,7 +409,8 @@ static inline bool bch_btree_node_iter_end(struct btree_node_iter *iter)
return !iter->used;
}
-static inline struct bkey *bch_btree_node_iter_peek_all(struct btree_node_iter *iter)
+static inline struct bkey_packed *
+bch_btree_node_iter_peek_all(struct btree_node_iter *iter)
{
return bch_btree_node_iter_end(iter)
? NULL
@@ -401,11 +420,12 @@ static inline struct bkey *bch_btree_node_iter_peek_all(struct btree_node_iter *
/* In debug mode, bch_btree_node_iter_next_all() does debug checks */
#ifdef CONFIG_BCACHE_DEBUG
-struct bkey *bch_btree_node_iter_next_all(struct btree_node_iter *);
+struct bkey_packed *bch_btree_node_iter_next_all(struct btree_node_iter *);
#else
-static inline struct bkey *bch_btree_node_iter_next_all(struct btree_node_iter *iter)
+static inline struct bkey_packed *
+bch_btree_node_iter_next_all(struct btree_node_iter *iter)
{
- struct bkey *ret = bch_btree_node_iter_peek_all(iter);
+ struct bkey_packed *ret = bch_btree_node_iter_peek_all(iter);
if (ret)
bch_btree_node_iter_advance(iter);
@@ -414,9 +434,10 @@ static inline struct bkey *bch_btree_node_iter_next_all(struct btree_node_iter *
}
#endif
-static inline struct bkey *bch_btree_node_iter_next(struct btree_node_iter *iter)
+static inline struct bkey_packed *
+bch_btree_node_iter_next(struct btree_node_iter *iter)
{
- struct bkey *ret;
+ struct bkey_packed *ret;
do {
ret = bch_btree_node_iter_next_all(iter);
@@ -425,9 +446,10 @@ static inline struct bkey *bch_btree_node_iter_next(struct btree_node_iter *iter
return ret;
}
-static inline struct bkey *bch_btree_node_iter_peek(struct btree_node_iter *iter)
+static inline struct bkey_packed *
+bch_btree_node_iter_peek(struct btree_node_iter *iter)
{
- struct bkey *ret;
+ struct bkey_packed *ret;
while ((ret = bch_btree_node_iter_peek_all(iter)) &&
bkey_deleted(ret))
@@ -436,17 +458,24 @@ static inline struct bkey *bch_btree_node_iter_peek(struct btree_node_iter *iter
return ret;
}
-static inline struct bkey *
+static inline struct bkey_packed *
bch_btree_node_iter_peek_overlapping(struct btree_node_iter *iter,
struct bkey *end)
{
- struct bkey *ret;
+ const struct bkey_format *f = &iter->b->set->data->format;
+ struct bkey_packed *ret;
+ struct bkey u;
while ((ret = bch_btree_node_iter_peek_all(iter)) &&
- (bkey_cmp(ret->p, bkey_start_pos(end)) <= 0))
+ (bkey_cmp_left_packed(f, ret, bkey_start_pos(end)) <= 0))
bch_btree_node_iter_next_all(iter);
- return ret && bkey_cmp(bkey_start_pos(ret), end->p) < 0 ? ret : NULL;
+ if (!ret)
+ return false;
+
+ u = bkey_unpack_key(f, ret);
+
+ return bkey_cmp(bkey_start_pos(&u), end->p) < 0 ? ret : NULL;
}
/*
@@ -461,6 +490,14 @@ bch_btree_node_iter_peek_overlapping(struct btree_node_iter *iter,
for (bch_btree_node_iter_init_from_start((b), (iter)); \
((k) = bch_btree_node_iter_next_all(iter));)
+bool bch_btree_node_iter_next_unpack(struct btree_keys *,
+ struct btree_node_iter *,
+ struct bkey_tup *);
+
+#define for_each_btree_node_key_unpack(b, tup, iter) \
+ for (bch_btree_node_iter_init_from_start((b), (iter)); \
+ bch_btree_node_iter_next_unpack(b, iter, tup);)
+
/* Sorting */
struct bset_sort_state {
@@ -472,8 +509,6 @@ struct bset_sort_state {
struct time_stats time;
};
-typedef bool (*ptr_filter_fn)(struct btree_keys *, struct bkey *);
-
typedef void (*btree_keys_sort_fn)(struct btree_keys *, struct bset *,
struct btree_node_iter *iter);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 9f5eff16ec63..a23f4e2576a5 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -246,19 +246,17 @@ void bch_btree_node_read_done(struct btree *b, struct cache *ca,
const struct bch_extent_ptr *ptr)
{
struct cache_set *c = b->c;
+ const struct bkey_format *f = &b->keys.set->data->format;
const char *err;
struct bset *i = btree_bset_first(b);
struct btree_node_iter *iter;
- struct bkey *k;
+ struct bkey_packed *k;
iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
iter->size = btree_blocks(c);
iter->used = 0;
iter->is_extents = b->keys.ops->is_extents;
-
-#ifdef CONFIG_BCACHE_DEBUG
iter->b = &b->keys;
-#endif
err = "dynamic fault";
if (bch_meta_read_fault("btree"))
@@ -298,6 +296,8 @@ void bch_btree_node_read_done(struct btree *b, struct cache *ca,
for (k = i->start;
k != bset_bkey_last(i);) {
+ struct bkey_tup tup;
+
if (!k->u64s) {
btree_node_error(b, ca, ptr,
"KEY_U64s 0: %zu bytes of metadata lost",
@@ -315,12 +315,17 @@ void bch_btree_node_read_done(struct btree *b, struct cache *ca,
break;
}
+ bkey_disassemble(&tup, f, k);
+
if (bkey_invalid(c, b->level
? BKEY_TYPE_BTREE
- : b->btree_id, k)) {
- char buf[80];
+ : b->btree_id,
+ bkey_tup_to_s_c(&tup))) {
+ char buf[160];
- bch_bkey_val_to_text(b, buf, sizeof(buf), k);
+ bkey_disassemble(&tup, f, k);
+ bch_bkey_val_to_text(b, buf, sizeof(buf),
+ bkey_tup_to_s_c(&tup));
btree_node_error(b, ca, ptr,
"invalid bkey %s", buf);
@@ -352,7 +357,8 @@ void bch_btree_node_read_done(struct btree *b, struct cache *ca,
i = b->keys.set[0].data;
err = "short btree key";
if (b->keys.set[0].size &&
- bkey_cmp(b->key.p, b->keys.set[0].end.p) < 0)
+ bkey_cmp_packed(&b->keys.set->data->format,
+ &b->key.k, &b->keys.set[0].end) < 0)
goto err;
out:
@@ -484,7 +490,7 @@ static void do_btree_node_write(struct closure *cl)
struct btree *b = container_of(cl, struct btree, io);
struct bset *i = btree_bset_last(b);
BKEY_PADDED(key) k;
- struct bkey_i_extent *e;
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct cache *ca;
size_t blocks_to_write = set_blocks(i, block_bytes(b->c));
@@ -536,7 +542,7 @@ static void do_btree_node_write(struct closure *cl)
*/
bkey_copy(&k.key, &b->key);
- e = bkey_i_to_extent(&k.key);
+ e = bkey_i_to_s_extent(&k.key);
extent_for_each_ptr(e, ptr)
SET_PTR_OFFSET(ptr, PTR_OFFSET(ptr) +
@@ -990,17 +996,18 @@ int bch_btree_cache_alloc(struct cache_set *c)
/* Btree in memory cache - hash table */
-static inline u64 PTR_HASH(const struct bkey *k)
+static inline u64 PTR_HASH(const struct bkey_i *k)
{
return bkey_i_to_extent_c(k)->v.ptr[0]._val;
}
-static struct hlist_head *mca_hash(struct cache_set *c, const struct bkey *k)
+static struct hlist_head *mca_hash(struct cache_set *c, const struct bkey_i *k)
{
return &c->bucket_hash[hash_32(PTR_HASH(k), BUCKET_HASH_BITS)];
}
-static inline struct btree *mca_find(struct cache_set *c, const struct bkey *k)
+static inline struct btree *mca_find(struct cache_set *c,
+ const struct bkey_i *k)
{
struct btree *b;
@@ -1089,7 +1096,7 @@ static void bch_cannibalize_unlock(struct cache_set *c)
}
}
-static struct btree *mca_alloc(struct cache_set *c, const struct bkey *k,
+static struct btree *mca_alloc(struct cache_set *c, const struct bkey_i *k,
int level, enum btree_id id, struct closure *cl)
{
struct btree *b = NULL;
@@ -1175,16 +1182,11 @@ err:
* the @write parameter.
*/
static struct btree *bch_btree_node_get(struct btree_iter *iter,
- const struct bkey *k,
+ const struct bkey_i *k,
int level)
{
int i = 0;
struct btree *b;
- BKEY_PADDED(k) tmp;
-
- /* k points into the parent which we'll unlock, save us a copy */
- bkey_copy(&tmp.k, k);
- k = &tmp.k;
BUG_ON(level < 0);
retry:
@@ -1379,7 +1381,7 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
&c->cache_all, NULL))
BUG();
- BUG_ON(k.key.size);
+ BUG_ON(k.key.k.size);
b = mca_alloc(c, &k.key, level, id, NULL);
BUG_ON(IS_ERR_OR_NULL(b));
@@ -1395,22 +1397,32 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
return b;
}
-struct btree *btree_node_alloc_replacement(struct btree *b,
- enum alloc_reserve reserve)
+struct btree *__btree_node_alloc_replacement(struct btree *b,
+ enum alloc_reserve reserve,
+ struct bkey_format format)
{
struct btree *n;
n = bch_btree_node_alloc(b->c, b->level, b->btree_id, reserve);
+ n->keys.set->data->format = format;
+
bch_btree_sort_into(&n->keys, &b->keys,
b->keys.ops->key_normalize,
&b->c->sort);
- n->key.p = b->key.p;
+ n->key.k.p = b->key.k.p;
trace_bcache_btree_node_alloc_replacement(b, n);
return n;
}
+struct btree *btree_node_alloc_replacement(struct btree *b,
+ enum alloc_reserve reserve)
+{
+ return __btree_node_alloc_replacement(b, reserve,
+ btree_keys_calc_format(&b->keys));
+}
+
static int __btree_check_reserve(struct cache_set *c,
enum alloc_reserve reserve,
unsigned required,
@@ -1477,7 +1489,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id,
b = bch_btree_node_alloc(c, 0, id, id);
- b->key.p = POS_MAX;
+ b->key.k.p = POS_MAX;
six_unlock_write(&b->lock);
bch_btree_node_write(b, writes, NULL);
@@ -1489,7 +1501,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id,
}
int bch_btree_root_read(struct cache_set *c, enum btree_id id,
- const struct bkey *k, unsigned level)
+ const struct bkey_i *k, unsigned level)
{
struct closure cl;
struct btree *b;
@@ -1573,7 +1585,7 @@ int bch_btree_node_rewrite(struct btree *b, struct btree_iter *iter, bool wait)
*/
void bch_btree_insert_and_journal(struct btree *b,
struct btree_node_iter *node_iter,
- struct bkey *insert,
+ struct bkey_i *insert,
struct journal_res *res)
{
struct cache_set *c = b->c;
@@ -1614,32 +1626,31 @@ static bool btree_insert_key(struct btree_iter *iter, struct btree *b,
{
bool dequeue = false;
struct btree_node_iter *node_iter = &iter->node_iters[b->level];
- struct bkey *insert = bch_keylist_front(insert_keys);
+ struct bkey_i *insert = bch_keylist_front(insert_keys), *orig = insert;
BKEY_PADDED(key) temp;
struct bpos done;
s64 newsize, oldsize = bch_count_data(&b->keys);
bool do_insert;
- struct bkey *orig = insert;
- BUG_ON(bkey_deleted(insert) && bkey_val_u64s(insert));
+ BUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
BUG_ON(write_block(b) != btree_bset_last(b));
BUG_ON(!b->level &&
- bkey_cmp(bkey_start_pos(insert), iter->pos) < 0);
+ bkey_cmp(bkey_start_pos(&insert->k), iter->pos) < 0);
bch_btree_node_iter_verify(&b->keys, node_iter);
if (b->keys.ops->is_extents) {
bkey_copy(&temp.key, insert);
insert = &temp.key;
- if (bkey_cmp(insert->p, b->key.p) > 0)
- bch_cut_back(b->key.p, insert);
+ if (bkey_cmp(insert->k.p, b->key.k.p) > 0)
+ bch_cut_back(b->key.k.p, &insert->k);
do_insert = bch_insert_fixup_extent(b, insert, node_iter,
replace, &done, res);
bch_cut_front(done, orig);
- dequeue = (orig->size == 0);
+ dequeue = (orig->k.size == 0);
} else {
- BUG_ON(bkey_cmp(insert->p, b->key.p) > 0);
+ BUG_ON(bkey_cmp(insert->k.p, b->key.k.p) > 0);
do_insert = bch_insert_fixup_key(b, insert, node_iter,
replace, &done, res);
@@ -1677,7 +1688,7 @@ static bool have_enough_space(struct btree *b, struct keylist *insert_keys)
? bch_keylist_nkeys(insert_keys)
: b->keys.ops->is_extents
? BKEY_EXTENT_MAX_U64s * 3
- : bch_keylist_front(insert_keys)->u64s;
+ : bch_keylist_front(insert_keys)->k.u64s;
return u64s <= bch_btree_keys_u64s_remaining(&b->keys);
}
@@ -1685,12 +1696,12 @@ static bool have_enough_space(struct btree *b, struct keylist *insert_keys)
static void verify_keys_sorted(struct keylist *l)
{
#ifdef CONFIG_BCACHE_DEBUG
- struct bkey *k;
+ struct bkey_i *k;
for (k = l->bot;
k < l->top && bkey_next(k) < l->top;
k = bkey_next(k))
- BUG_ON(bkey_cmp(k->p, bkey_next(k)->p) > 0);
+ BUG_ON(bkey_cmp(k->k.p, bkey_next(k)->k.p) > 0);
#endif
}
@@ -1714,7 +1725,7 @@ bch_btree_insert_keys(struct btree *b,
{
bool done = false, inserted = false, need_split = false;
struct journal_res res = { 0, 0 };
- struct bkey *k = bch_keylist_front(insert_keys);
+ struct bkey_i *k = bch_keylist_front(insert_keys);
verify_keys_sorted(insert_keys);
BUG_ON(!btree_node_intent_locked(iter, b->level));
@@ -1727,7 +1738,7 @@ bch_btree_insert_keys(struct btree *b,
* of it, in the bkey_cmpxchg() or handle_existing_key_newer()
* cases
*/
- unsigned n_min = bch_keylist_front(insert_keys)->u64s;
+ unsigned n_min = bch_keylist_front(insert_keys)->k.u64s;
unsigned n_max = bch_keylist_nkeys(insert_keys);
unsigned actual_min = jset_u64s(n_min) * 2;
@@ -1751,8 +1762,8 @@ bch_btree_insert_keys(struct btree *b,
/* finished for this node */
if (b->keys.ops->is_extents
- ? bkey_cmp(bkey_start_pos(k), b->key.p) >= 0
- : bkey_cmp(k->p, b->key.p) > 0) {
+ ? bkey_cmp(bkey_start_pos(&k->k), b->key.k.p) >= 0
+ : bkey_cmp(k->k.p, b->key.k.p) > 0) {
done = true;
break;
}
@@ -1763,7 +1774,7 @@ bch_btree_insert_keys(struct btree *b,
break;
}
- if (!b->level && journal_res_full(&res, k))
+ if (!b->level && journal_res_full(&res, &k->k))
break;
if (btree_insert_key(iter, b, insert_keys,
@@ -1818,7 +1829,7 @@ static int btree_split(struct btree *b,
struct btree *n1, *n2 = NULL, *n3 = NULL;
struct bset *set1, *set2;
uint64_t start_time = local_clock();
- struct bkey *k;
+ struct bkey_packed *k;
enum btree_insert_status status;
int ret;
@@ -1903,12 +1914,13 @@ static int btree_split(struct btree *b,
n2 = bch_btree_node_alloc(iter->c, b->level,
iter->btree_id, reserve);
set2 = btree_bset_first(n2);
+ set2->format = set1->format;
if (!parent) {
n3 = bch_btree_node_alloc(iter->c, b->level + 1,
iter->btree_id, reserve);
- n3->key.p = POS_MAX;
+ n3->key.k.p = POS_MAX;
six_unlock_write(&n3->lock);
}
@@ -1921,7 +1933,7 @@ static int btree_split(struct btree *b,
k = bkey_next(k))
;
- n1->key.p = k->p;
+ n1->key.k.p = bkey_unpack_key(&n1->keys.set->data->format, k).p;
k = bkey_next(k);
@@ -1938,7 +1950,7 @@ static int btree_split(struct btree *b,
bset_bkey_last(set1),
set2->u64s * sizeof(u64));
- n2->key.p = b->key.p;
+ n2->key.k.p = b->key.k.p;
six_unlock_write(&n1->lock);
six_unlock_write(&n2->lock);
@@ -2011,7 +2023,7 @@ static int btree_split(struct btree *b,
replace, persistent);
if (n2 &&
- bkey_cmp(iter->pos, n1->key.p) > 0) {
+ bkey_cmp(iter->pos, n1->key.k.p) > 0) {
six_unlock_intent(&n1->lock);
btree_iter_node_set(iter, n2);
@@ -2155,7 +2167,7 @@ traverse:
break;
bch_btree_iter_set_pos(iter,
- bkey_start_pos(bch_keylist_front(insert_keys)));
+ bkey_start_pos(&bch_keylist_front(insert_keys)->k));
ret = bch_btree_iter_traverse(iter);
if (ret)
@@ -2178,13 +2190,14 @@ traverse:
* -EAGAIN: @iter->cl was put on a waitlist waiting for btree node allocation
* -EINTR: btree node was changed while upgrading to write lock
*/
-int bch_btree_insert_check_key(struct btree_iter *iter, struct bkey *check_key)
+int bch_btree_insert_check_key(struct btree_iter *iter,
+ struct bkey_i *check_key)
{
struct bkey_i_cookie *cookie;
BKEY_PADDED(key) tmp;
- check_key->type = KEY_TYPE_COOKIE;
- set_bkey_val_bytes(check_key, sizeof(struct bch_cookie));
+ check_key->k.type = KEY_TYPE_COOKIE;
+ set_bkey_val_bytes(&check_key->k, sizeof(struct bch_cookie));
cookie = bkey_i_to_cookie(check_key);
get_random_bytes(&cookie->v, sizeof(cookie->v));
@@ -2193,7 +2206,7 @@ int bch_btree_insert_check_key(struct btree_iter *iter, struct bkey *check_key)
bch_btree_node_iter_init(&iter->nodes[0]->keys,
&iter->node_iters[0],
- bkey_start_pos(check_key));
+ bkey_start_pos(&check_key->k));
return bch_btree_insert_at(iter, &keylist_single(&tmp.key), NULL,
NULL, iter->btree_id, BTREE_INSERT_ATOMIC);
@@ -2215,7 +2228,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id,
int ret, ret2;
bch_btree_iter_init(&iter, c, id,
- bkey_start_pos(bch_keylist_front(keys)));
+ bkey_start_pos(&bch_keylist_front(keys)->k));
ret = bch_btree_iter_traverse(&iter);
if (unlikely(ret))
@@ -2243,28 +2256,42 @@ int bch_btree_iter_unlock(struct btree_iter *iter)
}
/* peek_all() doesn't skip deleted keys */
-static const struct bkey *__btree_iter_peek_all(struct btree_iter *iter)
+static inline struct bkey_s_c __btree_iter_peek_all(struct btree_iter *iter)
{
- struct bkey *k =
+ const struct bkey_format *f =
+ &iter->nodes[iter->level]->keys.set->data->format;
+ struct bkey_packed *k =
bch_btree_node_iter_peek_all(&iter->node_iters[iter->level]);
+ struct bkey_s_c ret;
+
+ if (!k)
+ return bkey_s_c_null;
- if (k && expensive_debug_checks(iter->c))
- bkey_debugcheck(iter->nodes[iter->level], k);
+ bkey_disassemble(&iter->tup, f, k);
+ ret = bkey_tup_to_s_c(&iter->tup);
- return k;
+ if (expensive_debug_checks(iter->c))
+ bkey_debugcheck(iter->nodes[iter->level], ret);
+
+ return ret;
}
-static const struct bkey *__btree_iter_peek(struct btree_iter *iter)
+static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter)
{
- const struct bkey *ret;
+ const struct bkey_format *f =
+ &iter->nodes[iter->level]->keys.set->data->format;
+ struct bkey_packed *k =
+ bch_btree_node_iter_peek(&iter->node_iters[iter->level]);
+ struct bkey_s_c ret;
- while (1) {
- ret = __btree_iter_peek_all(iter);
- if (!ret || !bkey_deleted(ret))
- break;
+ if (!k)
+ return bkey_s_c_null;
- bch_btree_node_iter_next_all(&iter->node_iters[iter->level]);
- }
+ bkey_disassemble(&iter->tup, f, k);
+ ret = bkey_tup_to_s_c(&iter->tup);
+
+ if (expensive_debug_checks(iter->c))
+ bkey_debugcheck(iter->nodes[iter->level], ret);
return ret;
}
@@ -2303,9 +2330,13 @@ static void btree_iter_lock_root(struct btree_iter *iter, struct bpos pos)
static int btree_iter_down(struct btree_iter *iter, struct bpos pos)
{
- const struct bkey *k = __btree_iter_peek(iter);
- struct btree *b = bch_btree_node_get(iter, k, iter->level - 1);
+ struct btree *b;
+ struct bkey_s_c k = __btree_iter_peek(iter);
+ BKEY_PADDED(k) tmp;
+ bkey_reassemble(&tmp.k, k);
+
+ b = bch_btree_node_get(iter, &tmp.k, iter->level - 1);
if (unlikely(IS_ERR(b)))
return PTR_ERR(b);
@@ -2338,14 +2369,18 @@ retry:
while (iter->nodes[iter->level] &&
!(is_btree_node(iter, iter->level) &&
btree_node_relock(iter, iter->level) &&
- btree_iter_cmp(iter, pos, iter->nodes[iter->level]->key.p)))
+ btree_iter_cmp(iter, pos, iter->nodes[iter->level]->key.k.p)))
btree_iter_up(iter);
+ /*
+ * If we've got a btree node locked (i.e. we aren't about to relock the
+ * root) - advance its node iterator if necessary:
+ */
if (iter->nodes[iter->level]) {
- const struct bkey *k;
+ struct bkey_s_c k;
- while ((k = __btree_iter_peek_all(iter)) &&
- !btree_iter_cmp(iter, pos, k->p))
+ while ((k = __btree_iter_peek_all(iter)).k &&
+ !btree_iter_cmp(iter, pos, k.k->p))
bch_btree_node_iter_next_all(&iter->node_iters[iter->level]);
}
@@ -2397,8 +2432,8 @@ struct btree *bch_btree_iter_peek_node(struct btree_iter *iter)
bch_btree_iter_traverse(iter);
if ((b = iter->nodes[iter->level])) {
- BUG_ON(bkey_cmp(b->key.p, iter->pos) < 0);
- iter->pos = b->key.p;
+ BUG_ON(bkey_cmp(b->key.k.p, iter->pos) < 0);
+ iter->pos = b->key.k.p;
}
return b;
@@ -2423,14 +2458,14 @@ struct btree *bch_btree_iter_next_node(struct btree_iter *iter)
b = iter->nodes[iter->level];
- if (bkey_cmp(iter->pos, b->key.p) < 0) {
+ if (bkey_cmp(iter->pos, b->key.k.p) < 0) {
struct bpos pos = bkey_successor(iter->pos);
__bch_btree_iter_traverse(iter, 0, pos);
b = iter->nodes[iter->level];
}
- iter->pos = b->key.p;
+ iter->pos = b->key.k.p;
return b;
}
@@ -2459,73 +2494,74 @@ static struct bpos __bch_btree_iter_advance_pos(struct btree_iter *iter,
void bch_btree_iter_advance_pos(struct btree_iter *iter)
{
bch_btree_iter_set_pos(iter,
- __bch_btree_iter_advance_pos(iter, iter->k.p));
+ __bch_btree_iter_advance_pos(iter, iter->tup.k.p));
}
-const struct bkey *bch_btree_iter_peek(struct btree_iter *iter)
+struct bkey_s_c bch_btree_iter_peek(struct btree_iter *iter)
{
- const struct bkey *k;
+ struct bkey_s_c k;
struct bpos pos = iter->pos;
int ret;
while (1) {
ret = __bch_btree_iter_traverse(iter, 0, pos);
if (ret)
- return NULL;
+ return bkey_s_c_null;
- if (likely(k = __btree_iter_peek(iter))) {
- BUG_ON(bkey_cmp(k->p, pos) < 0);
- iter->k = *k;
+ if (likely((k = __btree_iter_peek(iter)).k)) {
+ BUG_ON(bkey_cmp(k.k->p, pos) < 0);
return k;
}
- pos = iter->nodes[0]->key.p;
+ pos = iter->nodes[0]->key.k.p;
if (!bkey_cmp(pos, POS_MAX))
- return NULL;
+ return (struct bkey_s_c) { NULL, NULL };
pos = __bch_btree_iter_advance_pos(iter, pos);
}
}
-const struct bkey *bch_btree_iter_peek_with_holes(struct btree_iter *iter)
+struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *iter)
{
- const struct bkey *k;
+ struct bkey_s_c k;
+ struct bkey n;
int ret;
while (1) {
ret = __bch_btree_iter_traverse(iter, 0, iter->pos);
if (ret)
- return NULL;
+ return bkey_s_c_null;
- k = bch_btree_node_iter_peek_all(iter->node_iters);
+ k = __btree_iter_peek_all(iter);
recheck:
- if (!k || bkey_cmp(bkey_start_pos(k), iter->pos) > 0) {
+ if (!k.k || bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) {
/* hole */
- bkey_init(&iter->k);
- iter->k.p = iter->pos;
+ bkey_init(&n);
+ n.p = iter->pos;
- if (!k)
- k = &iter->nodes[0]->key;
+ if (!k.k)
+ k.k = &iter->nodes[0]->key.k;
if (iter->btree_id == BTREE_ID_EXTENTS) {
- if (iter->k.p.offset == KEY_OFFSET_MAX) {
+ if (n.p.offset == KEY_OFFSET_MAX) {
iter->pos = bkey_successor(iter->pos);
goto recheck;
}
- bch_key_resize(&iter->k,
+ bch_key_resize(&n,
min_t(u64, KEY_SIZE_MAX,
- (k->p.inode == iter->k.p.inode
- ? bkey_start_offset(k) : KEY_OFFSET_MAX) -
- iter->k.p.offset));
+ (k.k->p.inode == n.p.inode
+ ? bkey_start_offset(k.k)
+ : KEY_OFFSET_MAX) -
+ n.p.offset));
- BUG_ON(!iter->k.size);
+ BUG_ON(!n.size);
}
- return &iter->k;
- } else if (!bkey_deleted(k)) {
- iter->k = *k;
+ iter->tup.k = n;
+ return bkey_tup_to_s_c(&iter->tup);
+ } else if (!bkey_deleted(k.k)) {
return k;
} else {
bch_btree_node_iter_next_all(iter->node_iters);
@@ -2537,7 +2573,7 @@ recheck:
? bkey_cmp(iter->pos, POS_MAX)
: iter->pos.inode != KEY_INODE_MAX));
- return NULL;
+ return bkey_s_c_null;
}
void bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c,
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 19eeafc55bc8..3225f42b94b3 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -213,16 +213,6 @@ struct btree_iter {
/* Current position of the iterator */
struct bpos pos;
- /*
- * Previous key returned - so that bch_btree_iter_next()/
- * bch_btree_iter_next_with_holes() can correctly advance pos.
- *
- * NOTE: KEY_DELETED(&iter->k) is used to remember whether or not the
- * previous key returned was a hole, so bch_btree_iter_advance_pos()
- * knows whether or not to advance the btree_node_iter.
- */
- struct bkey k;
-
u32 lock_seq[BTREE_MAX_DEPTH];
/*
@@ -241,6 +231,12 @@ struct btree_iter {
*/
struct btree *nodes[BTREE_MAX_DEPTH + 1];
struct btree_node_iter node_iters[BTREE_MAX_DEPTH];
+
+ /*
+ * Current unpacked key - so that bch_btree_iter_next()/
+ * bch_btree_iter_next_with_holes() can correctly advance pos.
+ */
+ struct bkey_tup tup;
};
int bch_btree_iter_unlock(struct btree_iter *);
@@ -264,8 +260,8 @@ void bch_btree_iter_init(struct btree_iter *, struct cache_set *,
struct btree *bch_btree_iter_peek_node(struct btree_iter *);
struct btree *bch_btree_iter_next_node(struct btree_iter *);
-const struct bkey *bch_btree_iter_peek(struct btree_iter *);
-const struct bkey *bch_btree_iter_peek_with_holes(struct btree_iter *);
+struct bkey_s_c bch_btree_iter_peek(struct btree_iter *);
+struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *);
void bch_btree_iter_set_pos(struct btree_iter *, struct bpos);
void bch_btree_iter_advance_pos(struct btree_iter *);
bool bch_btree_iter_upgrade(struct btree_iter *);
@@ -297,14 +293,14 @@ static void inline btree_iter_node_set(struct btree_iter *iter, struct btree *b)
(b); \
(b) = bch_btree_iter_next_node(iter))
-#define for_each_btree_key(iter, c, btree_id, k, start) \
+#define for_each_btree_key(iter, c, btree_id, _k, start) \
for (bch_btree_iter_init((iter), (c), (btree_id), start); \
- ((k) = bch_btree_iter_peek(iter)); \
+ ((_k) = bch_btree_iter_peek(iter)).k; \
bch_btree_iter_advance_pos(iter))
-#define for_each_btree_key_with_holes(iter, c, btree_id, k, start) \
+#define for_each_btree_key_with_holes(iter, c, btree_id, _k, start) \
for (bch_btree_iter_init((iter), (c), (btree_id), start); \
- ((k) = bch_btree_iter_peek_with_holes(iter)); \
+ ((_k) = bch_btree_iter_peek_with_holes(iter)).k; \
bch_btree_iter_advance_pos(iter))
#define btree_node_root(b) ((b)->c->btree_roots[(b)->btree_id])
@@ -318,6 +314,9 @@ void bch_btree_node_read_done(struct btree *, struct cache *,
void bch_btree_flush(struct cache_set *);
void bch_btree_write_oldest(struct cache_set *, u64);
+struct btree *__btree_node_alloc_replacement(struct btree *,
+ enum alloc_reserve,
+ struct bkey_format);
struct btree *btree_node_alloc_replacement(struct btree *,
enum alloc_reserve);
int btree_check_reserve(struct btree *, struct btree_iter *,
@@ -325,11 +324,11 @@ int btree_check_reserve(struct btree *, struct btree_iter *,
int bch_btree_root_alloc(struct cache_set *, enum btree_id, struct closure *);
int bch_btree_root_read(struct cache_set *, enum btree_id,
- const struct bkey *, unsigned);
+ const struct bkey_i *, unsigned);
void bch_btree_insert_and_journal(struct btree *,
struct btree_node_iter *,
- struct bkey *,
+ struct bkey_i *,
struct journal_res *);
int bch_btree_insert_node(struct btree *, struct btree_iter *,
@@ -345,7 +344,7 @@ int bch_btree_insert_node(struct btree *, struct btree_iter *,
int bch_btree_insert_at(struct btree_iter *, struct keylist *,
struct bch_replace_info *, struct closure *,
enum alloc_reserve, unsigned);
-int bch_btree_insert_check_key(struct btree_iter *, struct bkey *);
+int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
int bch_btree_insert(struct cache_set *, enum btree_id, struct keylist *,
struct bch_replace_info *, struct closure *);
diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h
index 25fd967515cb..ae41fc15dcb0 100644
--- a/drivers/md/bcache/buckets.h
+++ b/drivers/md/bcache/buckets.h
@@ -47,13 +47,13 @@ static inline size_t PTR_BUCKET_NR(const struct cache *ca,
* Returns 0 if no pointers or device offline - only for tracepoints!
*/
static inline size_t PTR_BUCKET_NR_TRACE(const struct cache_set *c,
- const struct bkey *k,
+ const struct bkey_i *k,
unsigned ptr)
{
const struct cache *ca;
size_t bucket = 0;
- if (k->type == BCH_EXTENT) {
+ if (k->k.type == BCH_EXTENT) {
const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
const struct bch_extent_ptr *p = &e->v.ptr[ptr];
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 3b854415ca34..34dc52846ee4 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -207,7 +207,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
{
struct dump_iter *i = file->private_data;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
int err;
i->ubuf = buf;
@@ -222,8 +222,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
return i->ret;
for_each_btree_key(&iter, i->c, BTREE_ID_EXTENTS, k, i->from) {
- bch_bkey_val_to_text(iter.nodes[0], i->buf,
- sizeof(i->buf), k);
+ bch_bkey_val_to_text(iter.nodes[0], i->buf, sizeof(i->buf), k);
i->bytes = strlen(i->buf);
BUG_ON(i->bytes >= PAGE_SIZE);
i->buf[i->bytes] = '\n';
@@ -233,7 +232,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
if (err)
break;
- i->from = k->p;
+ i->from = k.k->p;
if (!i->size)
break;
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index fad5eaca651a..13deefb9bbf4 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -34,43 +34,43 @@ static u64 bch_dirent_hash(const struct qstr *name)
}
#define __dirent_name_bytes(d) \
- (bkey_bytes(&(d)->k) - sizeof(struct bkey_i_dirent))
+ (bkey_val_bytes((d).k) - sizeof(struct bch_dirent))
-static unsigned dirent_name_bytes(const struct bkey_i_dirent *d)
+static unsigned dirent_name_bytes(struct bkey_s_c_dirent d)
{
unsigned len = __dirent_name_bytes(d);
- while (len && !d->v.d_name[len - 1])
+ while (len && !d.v->d_name[len - 1])
--len;
return len;
}
-static int dirent_cmp(const struct bkey_i_dirent *d, const struct qstr *q)
+static int dirent_cmp(struct bkey_s_c_dirent d,
+ const struct qstr *q)
{
int len = dirent_name_bytes(d);
- return len - q->len ?: memcmp(d->v.d_name, q->name, len);
+ return len - q->len ?: memcmp(d.v->d_name, q->name, len);
}
-static bool bch_dirent_invalid(const struct cache_set *c,
- const struct bkey *k)
+static bool bch_dirent_invalid(const struct cache_set *c, struct bkey_s_c k)
{
- if (k->type != BCH_DIRENT)
+ if (k.k->type != BCH_DIRENT)
return true;
- if (bkey_bytes(k) < sizeof(struct bkey_i_dirent))
+ if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent))
return true;
return false;
}
static void bch_dirent_to_text(const struct btree *b, char *buf,
- size_t size, const struct bkey *k)
+ size_t size, struct bkey_s_c k)
{
- const struct bkey_i_dirent *d = bkey_i_to_dirent_c(k);
+ struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
- scnprintf(buf, size, "%s -> %llu", d->v.d_name, d->v.d_inum);
+ scnprintf(buf, size, "%s -> %llu", d.v->d_name, d.v->d_inum);
}
const struct btree_keys_ops bch_dirent_ops = {
@@ -86,50 +86,51 @@ static int __bch_dirent_create(struct cache_set *c, u64 dir_inum,
bool update)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
struct keylist keys;
struct bkey_i_dirent *dirent;
int ret = -ENOENT;
bch_keylist_init(&keys);
- bkey_init(keys.top);
- keys.top->type = BCH_DIRENT;
- set_bkey_val_bytes(keys.top, sizeof(struct bch_dirent) + name->len);
+ bkey_init(&keys.top->k);
+ keys.top->k.type = BCH_DIRENT;
+ set_bkey_val_bytes(&keys.top->k, sizeof(struct bch_dirent) + name->len);
- if (bch_keylist_realloc(&keys, keys.top->u64s))
+ if (bch_keylist_realloc(&keys, keys.top->k.u64s))
return -ENOMEM;
dirent = bkey_i_to_dirent(keys.top);
dirent->v.d_inum = dst_inum;
+
memcpy(dirent->v.d_name, name->name, name->len);
memset(dirent->v.d_name + name->len, 0,
round_up(name->len, sizeof(u64)) - name->len);
- BUG_ON(dirent_name_bytes(dirent) != name->len);
- BUG_ON(dirent_cmp(dirent, name));
+ BUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+ BUG_ON(dirent_cmp(dirent_i_to_s_c(dirent), name));
bch_keylist_enqueue(&keys);
bch_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
POS(dir_inum, bch_dirent_hash(name)));
- while ((k = bch_btree_iter_peek_with_holes(&iter))) {
+ while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
/* hole? */
- if (k->type != BCH_DIRENT) {
+ if (k.k->type != BCH_DIRENT) {
if (!update)
goto insert;
break;
}
- if (!dirent_cmp(bkey_i_to_dirent_c(k), name)) {
+ if (!dirent_cmp(bkey_s_c_to_dirent(k), name)) {
/* found: */
if (!update) {
ret = -EEXIST;
break;
}
insert:
- dirent->k.p = k->p;
+ dirent->k.p = k.k->p;
ret = bch_btree_insert_at(&iter, &keys, NULL, NULL,
0, BTREE_INSERT_ATOMIC);
@@ -162,7 +163,7 @@ int bch_dirent_delete(struct cache_set *c, u64 dir_inum,
const struct qstr *name)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
u64 hash = bch_dirent_hash(name);
int ret = -ENOENT;
@@ -172,13 +173,13 @@ int bch_dirent_delete(struct cache_set *c, u64 dir_inum,
bch_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
POS(dir_inum, bch_dirent_hash(name)));
- while ((k = bch_btree_iter_peek_with_holes(&iter))) {
+ while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
/* hole, not found */
- if (k->type != BCH_DIRENT)
+ if (k.k->type != BCH_DIRENT)
break;
- if (!dirent_cmp(bkey_i_to_dirent_c(k), name)) {
- struct bkey delete;
+ if (!dirent_cmp(bkey_s_c_to_dirent(k), name)) {
+ struct bkey_i delete;
/*
* XXX
@@ -189,9 +190,9 @@ int bch_dirent_delete(struct cache_set *c, u64 dir_inum,
* probing)
*/
- bkey_init(&delete);
- delete.p = k->p;
- set_bkey_deleted(&delete);
+ bkey_init(&delete.k);
+ delete.k.p = k.k->p;
+ set_bkey_deleted(&delete.k);
ret = bch_btree_insert_at(&iter,
&keylist_single(&delete),
@@ -213,8 +214,8 @@ u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum,
const struct qstr *name)
{
struct btree_iter iter;
- const struct bkey *k;
- const struct bkey_i_dirent *dirent;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent dirent;
u64 hash = bch_dirent_hash(name);
pr_debug("searching for %llu:%llu (%s)",
@@ -223,14 +224,14 @@ u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum,
for_each_btree_key_with_holes(&iter, c, BTREE_ID_DIRENTS, k,
POS(dir_inum, bch_dirent_hash(name))) {
/* hole, not found */
- if (k->type != BCH_DIRENT)
+ if (k.k->type != BCH_DIRENT)
break;
- dirent = bkey_i_to_dirent_c(k);
+ dirent = bkey_s_c_to_dirent(k);
/* collision? */
if (!dirent_cmp(dirent, name)) {
- u64 inum = dirent->v.d_inum;
+ u64 inum = dirent.v->d_inum;
bch_btree_iter_unlock(&iter);
pr_debug("found %s: %llu", name->name, inum);
@@ -246,14 +247,14 @@ u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum,
int bch_empty_dir(struct cache_set *c, u64 dir_inum)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, k, POS(dir_inum, 0)) {
- if (k->p.inode > dir_inum)
+ if (k.k->p.inode > dir_inum)
break;
- if (k->type == BCH_DIRENT &&
- k->p.inode == dir_inum) {
+ if (k.k->type == BCH_DIRENT &&
+ k.k->p.inode == dir_inum) {
bch_btree_iter_unlock(&iter);
return -ENOTEMPTY;
}
@@ -270,8 +271,8 @@ int bch_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct cache_set *c = sb->s_fs_info;
struct btree_iter iter;
- const struct bkey *k;
- const struct bkey_i_dirent *dirent;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent dirent;
unsigned len;
if (!dir_emit_dots(file, ctx))
@@ -281,33 +282,33 @@ int bch_readdir(struct file *file, struct dir_context *ctx)
for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, k,
POS(inode->i_ino, ctx->pos)) {
- if (k->type != BCH_DIRENT)
+ if (k.k->type != BCH_DIRENT)
continue;
- dirent = bkey_i_to_dirent_c(k);
+ dirent = bkey_s_c_to_dirent(k);
pr_debug("saw %llu:%llu (%s) -> %llu",
- k->p.inode, k->p.offset,
- dirent->v.d_name, dirent->v.d_inum);
+ k.k->p.inode, k.k->p.offset,
+ dirent.v->d_name, dirent.v->d_inum);
- if (bkey_cmp(k->p, POS(inode->i_ino, ctx->pos)) < 0)
+ if (bkey_cmp(k.k->p, POS(inode->i_ino, ctx->pos)) < 0)
continue;
- if (k->p.inode > inode->i_ino)
+ if (k.k->p.inode > inode->i_ino)
break;
len = dirent_name_bytes(dirent);
- pr_debug("emitting %s", dirent->v.d_name);
+ pr_debug("emitting %s", dirent.v->d_name);
/*
* XXX: dir_emit() can fault and block, while we're holding locks
*/
- if (!dir_emit(ctx, dirent->v.d_name, len,
- dirent->v.d_inum, DT_UNKNOWN))
+ if (!dir_emit(ctx, dirent.v->d_name, len,
+ dirent.v->d_inum, DT_UNKNOWN))
break;
- ctx->pos = k->p.offset + 1;
+ ctx->pos = k.k->p.offset + 1;
}
bch_btree_iter_unlock(&iter);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 8d841c90b73a..bfc62cf3d604 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -19,6 +19,14 @@
#include <trace/events/bcache.h>
+#define bkey_extent_p(_f, _k) val_to_extent(bkeyp_val(_f, _k))
+
+static inline unsigned bkeyp_extent_ptrs(const struct bkey_format *f,
+ const struct bkey_packed *k)
+{
+ return bkeyp_val_u64s(f, k);
+}
+
static void sort_key_next(struct btree_node_iter *iter,
struct btree_node_iter_set *i)
{
@@ -35,16 +43,17 @@ static void sort_key_next(struct btree_node_iter *iter,
* Necessary for btree_sort_fixup() - if there are multiple keys that compare
* equal in different sets, we have to process them newest to oldest.
*/
-static inline bool key_sort_cmp(struct btree_node_iter_set l,
- struct btree_node_iter_set r)
-{
- s64 c = bkey_cmp(l.k->p, r.k->p);
-
- return c ? c > 0 : l.k > r.k;
-}
+#define key_sort_cmp(l, r) \
+({ \
+ int _c = bkey_cmp_packed(&iter->b->set->data->format, \
+ (l).k, (r).k); \
+ \
+ _c ? _c > 0 : (l).k > (r).k; \
+})
static inline bool should_drop_next_key(struct btree_node_iter *iter)
{
+ const struct bkey_format *f = &iter->b->set->data->format;
struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
if (bkey_deleted(l->k))
@@ -62,20 +71,23 @@ static inline bool should_drop_next_key(struct btree_node_iter *iter)
* comes first; so if l->k compares equal to r->k then l->k is older and
* should be dropped.
*/
- return !bkey_cmp(l->k->p, r->k->p);
+ return !bkey_cmp_packed(f, l->k, r->k);
}
void bch_key_sort_fix_overlapping(struct btree_keys *b,
struct bset *bset,
struct btree_node_iter *iter)
{
- struct bkey *out = bset->start;
+ struct bkey_packed *out = bset->start;
heap_resort(iter, key_sort_cmp);
while (!bch_btree_node_iter_end(iter)) {
if (!should_drop_next_key(iter)) {
- bkey_copy(out, iter->data->k);
+ /* XXX: need better bkey_copy */
+ //bkey_copy(out, iter->data->k);
+ memcpy(out, iter->data->k,
+ bkey_bytes(iter->data->k));
out = bkey_next(out);
}
@@ -90,21 +102,22 @@ void bch_key_sort_fix_overlapping(struct btree_keys *b,
/* This returns true if insert should be inserted, false otherwise */
-bool bch_insert_fixup_key(struct btree *b, struct bkey *insert,
+bool bch_insert_fixup_key(struct btree *b, struct bkey_i *insert,
struct btree_node_iter *iter,
struct bch_replace_info *replace,
struct bpos *done,
struct journal_res *res)
{
- BUG_ON(replace);
+ const struct bkey_format *f = &b->keys.set->data->format;
+ struct bkey_packed *k;
+ int c;
- while (1) {
- struct bkey *k = bch_btree_node_iter_peek_all(iter);
- if (!k || bkey_cmp(k->p, insert->p) > 0)
- break;
+ BUG_ON(replace);
- if (!bkey_cmp(k->p, insert->p) && !bkey_deleted(k)) {
- __set_bkey_deleted(k);
+ while ((k = bch_btree_node_iter_peek_all(iter)) &&
+ (c = bkey_cmp_packed(f, k, &insert->k)) <= 0) {
+ if (!c && !bkey_deleted(k)) {
+ k->type = KEY_TYPE_DELETED;
b->keys.nr_live_u64s -= k->u64s;
}
@@ -117,7 +130,7 @@ bool bch_insert_fixup_key(struct btree *b, struct bkey *insert,
/* Common among btree and extent ptrs */
-bool bch_extent_has_device(const struct bkey_i_extent *e, unsigned dev)
+bool bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
{
const struct bch_extent_ptr *ptr;
@@ -129,8 +142,9 @@ bool bch_extent_has_device(const struct bkey_i_extent *e, unsigned dev)
}
static bool should_drop_ptr(const struct cache_set *c,
- const struct bkey_i_extent *e,
- const struct bch_extent_ptr *ptr)
+ const struct bch_extent *e,
+ const struct bch_extent_ptr *ptr,
+ unsigned nr_ptrs)
{
unsigned dev;
struct cache *ca;
@@ -148,18 +162,18 @@ static bool should_drop_ptr(const struct cache_set *c,
if (bch_is_zero(mi[dev].uuid.b, sizeof(uuid_le)))
return true;
- if (bch_extent_ptr_is_dirty(c, e, ptr))
+ if (__bch_extent_ptr_is_dirty(c, e, ptr, nr_ptrs))
return false;
return (ca = PTR_CACHE(c, ptr)) && ptr_stale(ca, ptr);
}
-unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *c,
- const struct bkey *k)
+unsigned bch_extent_nr_ptrs_after_normalize(const struct btree *b,
+ const struct bkey_packed *k)
{
- const struct bkey_i_extent *e;
- const struct bch_extent_ptr *ptr;
- unsigned ret = 0;
+ const struct bkey_format *f = &b->keys.set->data->format;
+ const struct bch_extent *e;
+ unsigned ret = 0, ptr;
switch (k->type) {
case KEY_TYPE_DELETED:
@@ -167,22 +181,23 @@ unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *c,
return 0;
case KEY_TYPE_DISCARD:
- return k->version ? BKEY_U64s : 0;
+ return bkey_unpack_key(f, k).version ? BKEY_U64s : 0;
case KEY_TYPE_ERROR:
- return BKEY_U64s;
+ return bkeyp_key_u64s(f, k);
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_p_c_extent_val(f, k);
rcu_read_lock();
- extent_for_each_ptr(e, ptr)
- if (!should_drop_ptr(c, e, ptr))
+ for (ptr = 0; ptr < bkeyp_extent_ptrs(f, k); ptr++)
+ if (!should_drop_ptr(b->c, e, &e->ptr[ptr],
+ bkeyp_extent_ptrs(f, k)))
ret++;
rcu_read_unlock();
if (ret)
- ret += BKEY_U64s;
+ ret += bkeyp_key_u64s(f, k);
return ret;
default:
@@ -190,22 +205,22 @@ unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *c,
}
}
-void bch_extent_drop_stale(struct cache_set *c, struct bkey *k)
+void bch_extent_drop_stale(struct cache_set *c, struct bkey_s k)
{
- struct bkey_i_extent *e = bkey_i_to_extent(k);
+ struct bkey_s_extent e = bkey_s_to_extent(k);
struct bch_extent_ptr *ptr;
rcu_read_lock();
extent_for_each_ptr_backwards(e, ptr)
- if (should_drop_ptr(c, e, ptr))
- bch_extent_drop_ptr(&e->k, ptr - e->v.ptr);
+ if (should_drop_ptr(c, extent_s_to_s_c(e).v,
+ ptr, bch_extent_ptrs(e)))
+ bch_extent_drop_ptr(e, ptr - e.v->ptr);
rcu_read_unlock();
}
-static bool bch_ptr_normalize(struct btree_keys *bk,
- struct bkey *k)
+static bool bch_ptr_normalize(struct btree_keys *bk, struct bkey_s k)
{
struct btree *b = container_of(bk, struct btree, keys);
@@ -215,19 +230,19 @@ static bool bch_ptr_normalize(struct btree_keys *bk,
/*
* Common among btree pointers and normal data extents
*/
-static bool __ptr_invalid(const struct cache_set *c, const struct bkey *k)
+static bool __ptr_invalid(const struct cache_set *c, struct bkey_s_c k)
{
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
struct cache_member *mi;
bool ret = true;
- if (k->u64s < BKEY_U64s)
+ if (k.k->u64s < BKEY_U64s)
return true;
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
if (bch_extent_ptrs(e) > BKEY_EXTENT_PTRS_MAX)
return true;
@@ -246,11 +261,11 @@ static bool __ptr_invalid(const struct cache_set *c, const struct bkey *k)
continue;
}
- if ((offset + e->k.size >
+ if ((offset + e.k->size >
m->bucket_size * m->nbuckets) ||
(offset <
m->bucket_size * m->first_bucket) ||
- ((offset & (m->bucket_size - 1)) + e->k.size >
+ ((offset & (m->bucket_size - 1)) + e.k->size >
m->bucket_size))
goto invalid;
}
@@ -271,7 +286,7 @@ invalid:
*/
static const char *bch_ptr_status(const struct cache_set *c,
struct cache_member *mi,
- const struct bkey_i_extent *e)
+ struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
@@ -294,13 +309,14 @@ static const char *bch_ptr_status(const struct cache_set *c,
continue;
}
- if (offset + e->k.size > m->bucket_size * m->nbuckets)
+ if (offset + e.k->size > m->bucket_size * m->nbuckets)
return "invalid: offset past end of device";
if (offset < m->bucket_size * m->first_bucket)
return "invalid: offset before first bucket";
- if ((offset & (m->bucket_size - 1)) + e->k.size > m->bucket_size)
+ if ((offset & (m->bucket_size - 1)) +
+ e.k->size > m->bucket_size)
return "invalid: spans multiple buckets";
if ((ca = PTR_CACHE(c, ptr)) &&
@@ -308,34 +324,34 @@ static const char *bch_ptr_status(const struct cache_set *c,
return "stale";
}
- if (!e->k.size)
+ if (!e.k->size)
return "zeroed key";
return "";
}
static void bch_extent_to_text(const struct btree *b, char *buf,
- size_t size, const struct bkey *k)
+ size_t size, struct bkey_s_c k)
{
struct cache_set *c = b->c;
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
char *out = buf, *end = buf + size;
const struct bch_extent_ptr *ptr;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
extent_for_each_ptr(e, ptr) {
- if (ptr != e->v.ptr)
+ if (ptr != e.v->ptr)
p(", ");
p("%llu:%llu gen %llu", PTR_DEV(ptr),
PTR_OFFSET(ptr), PTR_GEN(ptr));
}
- if (EXTENT_CACHED(&e->v))
+ if (EXTENT_CACHED(e.v))
p(" cached");
#if 0
if (KEY_CSUM(k))
@@ -350,17 +366,16 @@ static void bch_extent_to_text(const struct btree *b, char *buf,
/* Btree ptrs */
-static bool bch_btree_ptr_invalid(const struct cache_set *c,
- const struct bkey *k)
+static bool bch_btree_ptr_invalid(const struct cache_set *c, struct bkey_s_c k)
{
return bkey_extent_cached(k) ||
- k->size ||
+ k.k->size ||
__ptr_invalid(c, k);
}
-static void btree_ptr_debugcheck(struct btree *b, const struct bkey *k)
+static void btree_ptr_debugcheck(struct btree *b, struct bkey_s_c k)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct cache_set *c = b->c;
unsigned seq;
@@ -370,7 +385,7 @@ static void btree_ptr_debugcheck(struct btree *b, const struct bkey *k)
struct cache *ca;
bool bad;
- if (EXTENT_CACHED(&e->v)) {
+ if (EXTENT_CACHED(e.v)) {
btree_bug(b, "btree ptr marked as cached");
return;
}
@@ -412,7 +427,7 @@ struct cache *bch_btree_pick_ptr(struct cache_set *c,
const struct btree *b,
const struct bch_extent_ptr **ptr)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(&b->key);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&b->key);
struct cache *ca;
rcu_read_lock();
@@ -448,65 +463,71 @@ const struct bkey_ops bch_bkey_btree_ops = {
/* Extents */
-void bch_bkey_copy_single_ptr(struct bkey *dst, const struct bkey *src,
+void bch_bkey_copy_single_ptr(struct bkey_i *dst,
+ struct bkey_s_c _src,
unsigned i)
{
- const struct bkey_i_extent *srce = bkey_i_to_extent_c(src);
+ struct bkey_s_c_extent srce = bkey_s_c_to_extent(_src);
struct bkey_i_extent *dste;
BUG_ON(i > bch_extent_ptrs(srce));
/* Only copy the header, key, and one pointer. */
- *dst = srce->k;
+ dst->k = *srce.k;
dste = bkey_i_to_extent(dst);
- dste->v.ptr[0] = srce->v.ptr[i];
+ dste->v.ptr[0] = srce.v->ptr[i];
- bch_set_extent_ptrs(dste, 1);
+ bch_set_extent_ptrs(extent_i_to_s(dste), 1);
#if 0
/* We didn't copy the checksum so clear that bit. */
SET_KEY_CSUM(dst, 0);
#endif
}
-bool bch_cut_front(struct bpos where, struct bkey *k)
+bool __bch_cut_front(struct bpos where, struct bkey_s k)
{
- struct bkey_i_extent *e;
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
unsigned len = 0;
- BUG_ON(bkey_cmp(where, k->p) > 0);
+ BUG_ON(bkey_cmp(where, k.k->p) > 0);
- if (bkey_cmp(where, bkey_start_pos(k)) <= 0)
+ if (bkey_cmp(where, bkey_start_pos(k.k)) <= 0)
return false;
- if (bkey_cmp(where, k->p) < 0)
- len = k->p.offset - where.offset;
+ if (bkey_cmp(where, k.k->p) < 0)
+ len = k.k->p.offset - where.offset;
else
- k->p = where;
+ k.k->p = where;
if (len)
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent(k);
+ e = bkey_s_to_extent(k);
extent_for_each_ptr(e, ptr)
SET_PTR_OFFSET(ptr, PTR_OFFSET(ptr) +
- e->k.size - len);
+ e.k->size - len);
break;
default:
break;
}
- BUG_ON(len > k->size);
- k->size = len;
+ BUG_ON(len > k.k->size);
+ k.k->size = len;
if (!len)
- __set_bkey_deleted(k);
+ __set_bkey_deleted(k.k);
return true;
}
+bool bch_cut_front(struct bpos where, struct bkey_i *k)
+{
+ return __bch_cut_front(where, bkey_i_to_s(k));
+}
+
bool bch_cut_back(struct bpos where, struct bkey *k)
{
unsigned len = 0;
@@ -536,13 +557,13 @@ bool bch_cut_back(struct bpos where, struct bkey *k)
* Returns a key corresponding to the start of @k split at @where, @k will be
* the second half of the split
*/
-#define bch_key_split(where, k) \
+#define bch_key_split(_where, _k) \
({ \
BKEY_PADDED(k) __tmp; \
\
- bkey_copy(&__tmp.k, k); \
- bch_cut_back(where, &__tmp.k); \
- bch_cut_front(where, k); \
+ bkey_copy(&__tmp.k, _k); \
+ bch_cut_back(_where, &__tmp.k.k); \
+ bch_cut_front(_where, _k); \
&__tmp.k; \
})
@@ -551,7 +572,8 @@ bool bch_cut_back(struct bpos where, struct bkey *k)
*
* bkey_start_offset(k) will be preserved, modifies where the extent ends
*/
-void bch_key_resize(struct bkey *k, unsigned new_size)
+void bch_key_resize(struct bkey *k,
+ unsigned new_size)
{
k->p.offset -= k->size;
k->p.offset += new_size;
@@ -559,19 +581,38 @@ void bch_key_resize(struct bkey *k, unsigned new_size)
}
/*
+ * In extent_sort_fix_overlapping(), insert_fixup_extent(),
+ * extent_merge_inline() - we're modifying keys in place that are packed. To do
+ * that we have to unpack the key, modify the unpacked key - then this
+ * copies/repacks the unpacked to the original as necessary.
+ */
+static void extent_save(struct bkey_packed *dst, struct bkey *src,
+ const struct bkey_format *f)
+{
+ struct bkey_i *dst_unpacked;
+
+ if ((dst_unpacked = packed_to_bkey(dst)))
+ dst_unpacked->k = *src;
+ else
+ BUG_ON(!bkey_pack_key(dst, src, f));
+}
+
+/*
* Returns true if l > r - unless l == r, in which case returns true if l is
* older than r.
*
* Necessary for sort_fix_overlapping() - if there are multiple keys that
* compare equal in different sets, we have to process them newest to oldest.
*/
-static inline bool extent_sort_cmp(struct btree_node_iter_set l,
- struct btree_node_iter_set r)
-{
- s64 c = bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k));
-
- return c ? c > 0 : l.k < r.k;
-}
+#define extent_sort_cmp(l, r) \
+({ \
+ const struct bkey_format *_f = &iter->b->set->data->format; \
+ struct bkey _ul = bkey_unpack_key(_f, (l).k); \
+ struct bkey _ur = bkey_unpack_key(_f, (r).k); \
+ \
+ int _c = bkey_cmp(bkey_start_pos(&_ul), bkey_start_pos(&_ur)); \
+ _c ? _c > 0 : (l).k < (r).k; \
+})
static inline void extent_sort_sift(struct btree_node_iter *iter, size_t i)
{
@@ -585,16 +626,26 @@ static inline void extent_sort_next(struct btree_node_iter *iter,
heap_sift(iter, i - iter->data, extent_sort_cmp);
}
-static struct bkey *extent_sort_append(struct btree_keys *b, struct bkey *out,
- struct bkey **prev, struct bkey *k)
+static struct bkey_packed *extent_sort_append(struct btree_keys *b,
+ struct bkey_packed *out,
+ struct bkey_packed **prev,
+ struct bkey_packed *k)
{
if (bkey_deleted(k))
return out;
- bkey_copy(out, k);
+ /* XXX: need better bkey_copy */
+ memcpy(out, k, bkey_bytes(k));
+ /*
+ * prev/out are packed, try_merge() works on unpacked keys... may make
+ * this work again later, but the main btree_mergesort() handles
+ * unpacking/merging/repacking
+ */
+#if 0
if (*prev && bch_bkey_try_merge(b, *prev, out))
return out;
+#endif
*prev = out;
return bkey_next(out);
@@ -604,33 +655,38 @@ void bch_extent_sort_fix_overlapping(struct btree_keys *b,
struct bset *bset,
struct btree_node_iter *iter)
{
- struct btree_node_iter_set *l = iter->data, *r;
- struct bkey *prev = NULL, *out = bset->start;
+ struct bkey_format *f = &b->set->data->format;
+ struct btree_node_iter_set *_l = iter->data, *_r;
+ struct bkey_packed *prev = NULL, *out = bset->start;
+ struct bkey_tup l, r;
heap_resort(iter, extent_sort_cmp);
while (!bch_btree_node_iter_end(iter)) {
if (iter->used == 1) {
- out = extent_sort_append(b, out, &prev, l->k);
- extent_sort_next(iter, l);
+ out = extent_sort_append(b, out, &prev, _l->k);
+ extent_sort_next(iter, _l);
continue;
}
- r = iter->data + 1;
+ _r = iter->data + 1;
if (iter->used > 2 &&
- extent_sort_cmp(r[0], r[1]))
- r++;
+ extent_sort_cmp(_r[0], _r[1]))
+ _r++;
+
+ bkey_disassemble(&l, f, _l->k);
+ bkey_disassemble(&r, f, _r->k);
/* If current key and next key don't overlap, just append */
- if (bkey_cmp(l->k->p, bkey_start_pos(r->k)) <= 0) {
- out = extent_sort_append(b, out, &prev, l->k);
- extent_sort_next(iter, l);
+ if (bkey_cmp(l.k.p, bkey_start_pos(&r.k)) <= 0) {
+ out = extent_sort_append(b, out, &prev, _l->k);
+ extent_sort_next(iter, _l);
continue;
}
/* Skip 0 size keys */
- if (!r->k->size) {
- extent_sort_next(iter, r);
+ if (!r.k.size) {
+ extent_sort_next(iter, _r);
continue;
}
@@ -641,31 +697,36 @@ void bch_extent_sort_fix_overlapping(struct btree_keys *b,
*/
/* can't happen because of comparison func */
- BUG_ON(l->k < r->k &&
- !bkey_cmp(bkey_start_pos(l->k), bkey_start_pos(r->k)));
+ BUG_ON(_l->k < _r->k &&
+ !bkey_cmp(bkey_start_pos(&l.k), bkey_start_pos(&r.k)));
- if (l->k > r->k) {
+ if (_l->k > _r->k) {
/* l wins, trim r */
- if (bkey_cmp(l->k->p, r->k->p) >= 0)
- sort_key_next(iter, r);
- else
- bch_cut_front(l->k->p, r->k);
+ if (bkey_cmp(l.k.p, r.k.p) >= 0) {
+ sort_key_next(iter, _r);
+ } else {
+ __bch_cut_front(l.k.p, bkey_tup_to_s(&r));
+ extent_save(_r->k, &r.k, f);
+ }
- extent_sort_sift(iter, r - iter->data);
- } else if (bkey_cmp(l->k->p, r->k->p) > 0) {
+ extent_sort_sift(iter, _r - iter->data);
+ } else if (bkey_cmp(l.k.p, r.k.p) > 0) {
BKEY_PADDED(k) tmp;
/* r wins, but it overlaps in the middle of l - split l: */
- bkey_copy(&tmp.k, l->k);
+ bkey_reassemble(&tmp.k, bkey_tup_to_s_c(&l));
+ bch_cut_back(bkey_start_pos(&r.k), &tmp.k.k);
+
+ __bch_cut_front(r.k.p, bkey_tup_to_s(&l));
+ extent_save(_l->k, &l.k, f);
- bch_cut_back(bkey_start_pos(r->k), &tmp.k);
- bch_cut_front(r->k->p, l->k);
extent_sort_sift(iter, 0);
- out = extent_sort_append(b, out, &prev, &tmp.k);
+ out = extent_sort_append(b, out, &prev,
+ bkey_to_packed(&tmp.k));
} else {
- /* r wins, no split: */
- bch_cut_back(bkey_start_pos(r->k), l->k);
+ bch_cut_back(bkey_start_pos(&r.k), &l.k);
+ extent_save(_l->k, &l.k, f);
}
}
@@ -675,7 +736,7 @@ void bch_extent_sort_fix_overlapping(struct btree_keys *b,
}
int __bch_add_sectors(struct cache_set *c, struct btree *b,
- const struct bkey_i_extent *e, u64 offset,
+ struct bkey_s_c_extent e, u64 offset,
int sectors, bool fail_if_stale)
{
const struct bch_extent_ptr *ptr;
@@ -685,7 +746,7 @@ int __bch_add_sectors(struct cache_set *c, struct btree *b,
extent_for_each_online_device(c, e, ptr, ca) {
bool stale, dirty = bch_extent_ptr_is_dirty(c, e, ptr);
- trace_bcache_add_sectors(ca, e, ptr, offset,
+ trace_bcache_add_sectors(ca, e.k, ptr, offset,
sectors, dirty);
/*
@@ -728,7 +789,7 @@ int __bch_add_sectors(struct cache_set *c, struct btree *b,
return 0;
stale:
- while (--ptr >= e->v.ptr)
+ while (--ptr >= e.v->ptr)
if ((ca = PTR_CACHE(c, ptr)))
bch_mark_data_bucket(c, ca, b, ptr, -sectors,
bch_extent_ptr_is_dirty(c, e, ptr));
@@ -737,24 +798,26 @@ stale:
return -1;
}
-static int bch_add_sectors(struct btree *b, const struct bkey *k, u64 offset,
- int sectors, bool fail_if_stale)
+static int bch_add_sectors(struct btree *b, struct bkey_s_c k,
+ u64 offset, int sectors, bool fail_if_stale)
{
- if (sectors && k->type == BCH_EXTENT) {
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ if (sectors && k.k->type == BCH_EXTENT) {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- int ret = __bch_add_sectors(b->c, b, e, offset, sectors, fail_if_stale);
+ int ret = __bch_add_sectors(b->c, b, e, offset,
+ sectors, fail_if_stale);
if (ret)
return ret;
- if (!EXTENT_CACHED(&e->v))
- bcache_dev_sectors_dirty_add(b->c, e->k.p.inode, offset, sectors);
+ if (!EXTENT_CACHED(e.v))
+ bcache_dev_sectors_dirty_add(b->c, e.k->p.inode,
+ offset, sectors);
}
return 0;
}
-static void bch_subtract_sectors(struct btree *b, struct bkey *k,
+static void bch_subtract_sectors(struct btree *b, struct bkey_s_c k,
u64 offset, int sectors)
{
bch_add_sectors(b, k, offset, -sectors, false);
@@ -762,27 +825,28 @@ static void bch_subtract_sectors(struct btree *b, struct bkey *k,
/* These wrappers subtract exactly the sectors that we're removing from @k */
static void bch_cut_subtract_back(struct btree *b, struct bpos where,
- struct bkey *k)
+ struct bkey_s k)
{
- bch_subtract_sectors(b, k, where.offset,
- k->p.offset - where.offset);
- bch_cut_back(where, k);
+ bch_subtract_sectors(b, bkey_s_to_s_c(k), where.offset,
+ k.k->p.offset - where.offset);
+ bch_cut_back(where, k.k);
}
static void bch_cut_subtract_front(struct btree *b, struct bpos where,
- struct bkey *k)
+ struct bkey_s k)
{
- bch_subtract_sectors(b, k, bkey_start_offset(k),
- where.offset - bkey_start_offset(k));
- bch_cut_front(where, k);
+ bch_subtract_sectors(b, bkey_s_to_s_c(k), bkey_start_offset(k.k),
+ where.offset - bkey_start_offset(k.k));
+ __bch_cut_front(where, k);
}
-static void bch_drop_subtract(struct btree *b, struct bkey *k)
+static void bch_drop_subtract(struct btree *b, struct bkey_s k)
{
- if (k->size)
- bch_subtract_sectors(b, k, bkey_start_offset(k), k->size);
- k->size = 0;
- __set_bkey_deleted(k);
+ if (k.k->size)
+ bch_subtract_sectors(b, bkey_s_to_s_c(k),
+ bkey_start_offset(k.k), k.k->size);
+ k.k->size = 0;
+ __set_bkey_deleted(k.k);
}
/*
@@ -793,27 +857,27 @@ static void bch_drop_subtract(struct btree *b, struct bkey *k)
* splitting done in bch_extent_insert_fixup, preserving such
* caching is difficult.
*/
-static bool bkey_cmpxchg_cmp(const struct bkey *l, const struct bkey *r)
+static bool bkey_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
{
- const struct bkey_i_extent *le, *re;
+ struct bkey_s_c_extent le, re;
s64 offset;
unsigned i;
- BUG_ON(!l->size || !r->size);
+ BUG_ON(!l.k->size || !r.k->size);
- if (l->type != r->type ||
- l->version != r->version)
+ if (l.k->type != r.k->type ||
+ l.k->version != r.k->version)
return false;
- switch (l->type) {
+ switch (l.k->type) {
case KEY_TYPE_COOKIE:
- return !memcmp(&bkey_i_to_cookie_c(l)->v,
- &bkey_i_to_cookie_c(r)->v,
+ return !memcmp(bkey_s_c_to_cookie(l).v,
+ bkey_s_c_to_cookie(r).v,
sizeof(struct bch_cookie));
case BCH_EXTENT:
- le = bkey_i_to_extent_c(l);
- re = bkey_i_to_extent_c(r);
+ le = bkey_s_c_to_extent(l);
+ re = bkey_s_c_to_extent(r);
/*
* bkey_cmpxchg() handles partial matches - when either l or r
@@ -825,12 +889,13 @@ static bool bkey_cmpxchg_cmp(const struct bkey *l, const struct bkey *r)
* matching how bch_cut_front() adjusts device pointer offsets
* when adjusting the start of a key:
*/
- offset = bkey_start_offset(l) - bkey_start_offset(r);
+ offset = bkey_start_offset(l.k) - bkey_start_offset(r.k);
if (bch_extent_ptrs(le) == bch_extent_ptrs(re)) {
for (i = 0; i < bch_extent_ptrs(le); i++)
- if (le->v.ptr[i]._val !=
- re->v.ptr[i]._val + (offset << PTR_OFFSET_OFFSET))
+ if (le.v->ptr[i]._val !=
+ re.v->ptr[i]._val +
+ (offset << PTR_OFFSET_OFFSET))
goto try_partial;
return true;
@@ -870,35 +935,37 @@ static bool bkey_cmpxchg_cmp(const struct bkey *l, const struct bkey *r)
*/
static bool bkey_cmpxchg(struct btree *b,
struct btree_node_iter *iter,
- const struct bkey *k,
+ struct bkey_s_c k,
struct bch_replace_info *replace,
- struct bkey *new,
+ struct bkey_i *new,
struct bpos *done,
bool *inserted,
struct journal_res *res)
{
bool ret;
- struct bkey *old = &replace->key;
+ struct bkey_i *old = &replace->key;
/* must have something to compare against */
- BUG_ON(!bkey_val_u64s(old));
+ BUG_ON(!bkey_val_u64s(&old->k));
BUG_ON(b->level);
/* new must be a subset of old */
- BUG_ON(bkey_cmp(new->p, old->p) > 0 ||
- bkey_cmp(bkey_start_pos(new), bkey_start_pos(old)) < 0);
+ BUG_ON(bkey_cmp(new->k.p, old->k.p) > 0 ||
+ bkey_cmp(bkey_start_pos(&new->k),
+ bkey_start_pos(&old->k)) < 0);
/* if an exact match was requested, those are simple: */
if (replace->replace_exact) {
- ret = (k->u64s == old->u64s &&
- !memcmp(k, old, bkey_bytes(old)));
+ ret = bkey_val_bytes(k.k) == bkey_val_bytes(&old->k) &&
+ !memcmp(k.k, &old->k, sizeof(*k.k)) &&
+ !memcmp(k.v, &old->v, bkey_val_bytes(k.k));
if (ret)
replace->successes += 1;
else
replace->failures += 1;
- *done = new->p;
+ *done = new->k.p;
return ret;
}
@@ -906,9 +973,9 @@ static bool bkey_cmpxchg(struct btree *b,
* first, check if there was a hole - part of the new key that we
* haven't checked against any existing key
*/
- if (bkey_cmp(bkey_start_pos(k), *done) > 0) {
+ if (bkey_cmp(bkey_start_pos(k.k), *done) > 0) {
/* insert previous partial match: */
- if (bkey_cmp(*done, bkey_start_pos(new)) > 0) {
+ if (bkey_cmp(*done, bkey_start_pos(&new->k)) > 0) {
replace->successes += 1;
/*
@@ -927,17 +994,18 @@ static bool bkey_cmpxchg(struct btree *b,
*inserted = true;
}
- bch_cut_subtract_front(b, bkey_start_pos(k), new);
+ bch_cut_subtract_front(b, bkey_start_pos(k.k),
+ bkey_i_to_s(new));
/* advance @done from the end of prev key to the start of @k */
- *done = bkey_start_pos(k);
+ *done = bkey_start_pos(k.k);
}
- ret = bkey_cmpxchg_cmp(k, old);
+ ret = bkey_cmpxchg_cmp(k, bkey_i_to_s_c(old));
if (!ret) {
/* failed: */
replace->failures += 1;
- if (bkey_cmp(*done, bkey_start_pos(new)) > 0) {
+ if (bkey_cmp(*done, bkey_start_pos(&new->k)) > 0) {
/*
* [ prev key ]
* [ k ]
@@ -955,39 +1023,40 @@ static bool bkey_cmpxchg(struct btree *b,
}
/* update @new to be the part we haven't checked yet */
- if (bkey_cmp(k->p, new->p) > 0)
- bch_drop_subtract(b, new);
+ if (bkey_cmp(k.k->p, new->k.p) > 0)
+ bch_drop_subtract(b, bkey_i_to_s(new));
else
- bch_cut_subtract_front(b, k->p, new);
+ bch_cut_subtract_front(b, k.k->p, bkey_i_to_s(new));
} else
replace->successes += 1;
/* advance @done past the part of @k overlapping @new */
- *done = bkey_cmp(k->p, new->p) < 0 ? k->p : new->p;
+ *done = bkey_cmp(k.k->p, new->k.p) < 0 ? k.k->p : new->k.p;
return ret;
}
/* We are trying to insert a key with an older version than the existing one */
static void handle_existing_key_newer(struct btree *b,
struct btree_node_iter *iter,
- struct bkey *insert,
+ struct bkey_i *insert,
const struct bkey *k,
bool *inserted,
struct journal_res *res)
{
- struct bkey *split;
+ struct bkey_i *split;
/* k is the key currently in the tree, 'insert' the new key */
- switch (bch_extent_overlap(k, insert)) {
+ switch (bch_extent_overlap(k, &insert->k)) {
case BCH_EXTENT_OVERLAP_FRONT:
/* k and insert share the start, remove it from insert */
- bch_cut_subtract_front(b, k->p, insert);
+ bch_cut_subtract_front(b, k->p, bkey_i_to_s(insert));
break;
case BCH_EXTENT_OVERLAP_BACK:
/* k and insert share the end, remove it from insert */
- bch_cut_subtract_back(b, bkey_start_pos(k), insert);
+ bch_cut_subtract_back(b, bkey_start_pos(k),
+ bkey_i_to_s(insert));
break;
case BCH_EXTENT_OVERLAP_MIDDLE:
@@ -1006,14 +1075,14 @@ static void handle_existing_key_newer(struct btree *b,
* entry to @res.
*/
split = bch_key_split(bkey_start_pos(k), insert),
- bch_cut_subtract_front(b, k->p, insert);
+ bch_cut_subtract_front(b, k->p, bkey_i_to_s(insert));
bch_btree_insert_and_journal(b, iter, split, res);
*inserted = true;
break;
case BCH_EXTENT_OVERLAP_ALL:
/* k completely covers insert -- drop insert */
- bch_drop_subtract(b, insert);
+ bch_drop_subtract(b, bkey_i_to_s(insert));
break;
}
}
@@ -1043,7 +1112,7 @@ static void handle_existing_key_newer(struct btree *b,
* multiple bsets (i.e. full btree node):
*
* ∀ k, j
- * KEY_SIZE(k) != 0 ∧ KEY_SIZE(j) != 0 →
+ * k.size != 0 ∧ j.size != 0 →
* ¬ (k > bkey_start_pos(j) ∧ k < j)
*
* i.e. no two overlapping keys _of nonzero size_
@@ -1061,18 +1130,22 @@ static void handle_existing_key_newer(struct btree *b,
* If the end of done is not the same as the end of insert, then
* key insertion needs to continue/be retried.
*/
-bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
+bool bch_insert_fixup_extent(struct btree *b, struct bkey_i *insert,
struct btree_node_iter *iter,
struct bch_replace_info *replace,
struct bpos *done,
struct journal_res *res)
{
- struct bkey *k, *split;
- struct bpos orig_insert = insert->p;
+ const struct bkey_format *f = &b->keys.set->data->format;
+ struct bpos orig_insert = insert->k.p;
+ struct bkey_packed *_k;
+ struct bkey_tup tup;
+ struct bkey_s k;
+ BKEY_PADDED(k) split;
bool inserted = false;
- BUG_ON(bkey_deleted(insert));
- BUG_ON(!insert->size);
+ BUG_ON(bkey_deleted(&insert->k));
+ BUG_ON(!insert->k.size);
/*
* The end of this key is the range processed so far.
@@ -1083,7 +1156,7 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
*
* All sector counts up to @done are finalized.
*/
- *done = bkey_start_pos(insert);
+ *done = bkey_start_pos(&insert->k);
/*
* If this is a cmpxchg operation, @insert doesn't necessarily exist in
@@ -1101,18 +1174,24 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
* can also insert keys with stale pointers, but for those we still need
* to proceed with the insertion.
*/
- if (bch_add_sectors(b, insert, bkey_start_offset(insert),
- insert->size, replace != NULL)) {
+ if (bch_add_sectors(b, bkey_i_to_s_c(insert),
+ bkey_start_offset(&insert->k),
+ insert->k.size, replace != NULL)) {
/* We raced - a dirty pointer was stale */
- *done = insert->p;
- insert->size = 0;
+ *done = insert->k.p;
+ insert->k.size = 0;
if (replace != NULL)
replace->failures += 1;
return false;
}
- while (insert->size &&
- (k = bch_btree_node_iter_peek_overlapping(iter, insert))) {
+ while (insert->k.size &&
+ (_k = bch_btree_node_iter_peek_overlapping(iter, &insert->k))) {
+ bool needs_split, res_full;
+
+ bkey_disassemble(&tup, f, _k);
+
+ k = bkey_tup_to_s(&tup);
/*
* Before setting @done, we first check if we have space for
* the insert in the btree node and journal reservation.
@@ -1123,16 +1202,16 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
* iteration of this room will insert one key, so we need
* room for three keys.
*/
- bool needs_split = (bch_btree_keys_u64s_remaining(&b->keys) <
- BKEY_EXTENT_MAX_U64s * 3);
- bool res_full = journal_res_full(res, insert);
+ needs_split = (bch_btree_keys_u64s_remaining(&b->keys) <
+ BKEY_EXTENT_MAX_U64s * 3);
+ res_full = journal_res_full(res, &insert->k);
if (needs_split || res_full) {
/*
* XXX: would be better to explicitly signal that we
* need to split
*/
- bch_cut_subtract_back(b, *done, insert);
+ bch_cut_subtract_back(b, *done, bkey_i_to_s(insert));
goto out;
}
@@ -1143,49 +1222,54 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
* inserting. But we don't want to check them for replace
* operations.
*/
- if (replace == NULL)
- *done = bkey_cmp(k->p, insert->p) < 0 ? k->p : insert->p;
- else if (k->size &&
- !bkey_cmpxchg(b, iter, k, replace, insert, done,
- &inserted, res))
+ if (!replace)
+ *done = bkey_cmp(k.k->p, insert->k.p) < 0
+ ? k.k->p : insert->k.p;
+ else if (k.k->size &&
+ !bkey_cmpxchg(b, iter, bkey_s_to_s_c(k), replace,
+ insert, done, &inserted, res))
continue;
- if (k->size && insert->version &&
- insert->version < k->version) {
- handle_existing_key_newer(b, iter, insert, k,
+ if (k.k->size && insert->k.version &&
+ insert->k.version < k.k->version) {
+ handle_existing_key_newer(b, iter, insert, k.k,
&inserted, res);
continue;
}
/* k is the key currently in the tree, 'insert' the new key */
- switch (bch_extent_overlap(insert, k)) {
+ switch (bch_extent_overlap(&insert->k, k.k)) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert and k share the start, invalidate in k */
- bch_cut_subtract_front(b, insert->p, k);
+ bch_cut_subtract_front(b, insert->k.p, k);
+ extent_save(_k, k.k, f);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert and k share the end, invalidate in k */
- bch_cut_subtract_back(b, bkey_start_pos(insert), k);
+ bch_cut_subtract_back(b, bkey_start_pos(&insert->k), k);
+ extent_save(_k, k.k, f);
+
/*
* As the auxiliary tree is indexed by the end of the
* key and we've just changed the end, update the
* auxiliary tree.
*/
- bch_bset_fix_invalidated_key(&b->keys, k);
+ bch_bset_fix_invalidated_key(&b->keys, _k);
bch_btree_node_iter_advance(iter);
break;
case BCH_EXTENT_OVERLAP_ALL:
/* The insert key completely covers k, invalidate k */
- if (!bkey_deleted(k))
- b->keys.nr_live_u64s -= k->u64s;
+ if (!bkey_deleted(_k))
+ b->keys.nr_live_u64s -= _k->u64s;
bch_drop_subtract(b, k);
- k->p = bkey_start_pos(insert);
+ k.k->p = bkey_start_pos(&insert->k);
+ extent_save(_k, k.k, f);
- bch_bset_fix_invalidated_key(&b->keys, k);
+ bch_bset_fix_invalidated_key(&b->keys, _k);
bch_btree_node_iter_advance(iter);
break;
@@ -1204,28 +1288,33 @@ bool bch_insert_fixup_extent(struct btree *b, struct bkey *insert,
* modify k _before_ doing the insert (which will move
* what k points to)
*/
- split = bch_key_split(bkey_start_pos(insert), k);
- bch_cut_subtract_front(b, insert->p, k);
- bch_bset_insert(&b->keys, iter, split);
+ bkey_reassemble(&split.k, bkey_s_to_s_c(k));
+ bch_cut_back(bkey_start_pos(&insert->k), &split.k.k);
+
+ __bch_cut_front(bkey_start_pos(&insert->k), k);
+ bch_cut_subtract_front(b, insert->k.p, k);
+ extent_save(_k, k.k, f);
+
+ bch_bset_insert(&b->keys, iter, &split.k);
break;
}
}
/* Was there a hole? */
- if (bkey_cmp(*done, insert->p) < 0) {
+ if (bkey_cmp(*done, insert->k.p) < 0) {
/*
* Holes not allowed for cmpxchg operations, so chop off
* whatever we're not inserting (but done needs to reflect what
* we've processed, i.e. what insert was)
*/
if (replace != NULL)
- bch_cut_subtract_back(b, *done, insert);
+ bch_cut_subtract_back(b, *done, bkey_i_to_s(insert));
*done = orig_insert;
}
out:
- if (insert->size) {
+ if (insert->k.size) {
bch_btree_insert_and_journal(b, iter, insert, res);
inserted = true;
}
@@ -1233,30 +1322,30 @@ out:
return inserted;
}
-static bool bch_extent_invalid(const struct cache_set *c, const const struct bkey *k)
+static bool bch_extent_invalid(const struct cache_set *c, struct bkey_s_c k)
{
- return (k->type == BCH_EXTENT &&
- !k->size) ||
+ return (k.k->type == BCH_EXTENT &&
+ !k.k->size) ||
__ptr_invalid(c, k);
}
-static void bch_extent_debugcheck(struct btree *b, const struct bkey *k)
+static void bch_extent_debugcheck(struct btree *b, struct bkey_s_c k)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi;
struct cache_set *c = b->c;
struct cache *ca;
struct bucket *g;
unsigned seq, stale;
- char buf[80];
+ char buf[160];
bool bad;
unsigned ptrs_per_tier[CACHE_TIERS];
unsigned i, dev, tier, replicas;
memset(ptrs_per_tier, 0, sizeof(ptrs_per_tier));
- if (bch_extent_ptrs(e) < bch_extent_replicas_needed(c, e)) {
+ if (bch_extent_ptrs(e) < bch_extent_replicas_needed(c, e.v)) {
bch_bkey_val_to_text(b, buf, sizeof(buf), k);
cache_set_bug(c, "extent key bad (too few replicas): %s", buf);
return;
@@ -1331,7 +1420,7 @@ static void bch_extent_debugcheck(struct btree *b, const struct bkey *k)
bad_device:
bch_bkey_val_to_text(b, buf, sizeof(buf), k);
cache_set_bug(c, "extent pointer %u device missing: %s",
- (unsigned) (ptr - e->v.ptr), buf);
+ (unsigned) (ptr - e.v->ptr), buf);
cache_member_info_put();
return;
@@ -1339,7 +1428,7 @@ bad_ptr:
bch_bkey_val_to_text(b, buf, sizeof(buf), k);
cache_set_bug(c, "extent pointer %u bad gc mark: %s:\nbucket %zu prio %i "
"gen %i last_gc %i mark 0x%08x",
- (unsigned) (ptr - e->v.ptr), buf, PTR_BUCKET_NR(ca, ptr),
+ (unsigned) (ptr - e.v->ptr), buf, PTR_BUCKET_NR(ca, ptr),
g->read_prio, PTR_BUCKET_GEN(ca, ptr),
g->oldest_gen, g->mark.counter);
cache_member_info_put();
@@ -1355,16 +1444,16 @@ static unsigned PTR_TIER(struct cache_member_rcu *mi,
return dev < mi->nr_in_set ? CACHE_TIER(&mi->m[dev]) : UINT_MAX;
}
-bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
+bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
{
- struct bkey_i_extent *e;
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi;
unsigned i;
bool swapped, have_data = false;
bool cached;
- switch (k->type) {
+ switch (k.k->type) {
case KEY_TYPE_ERROR:
return false;
@@ -1373,18 +1462,18 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
return true;
case KEY_TYPE_DISCARD:
- return !k->version;
+ return !k.k->version;
case BCH_EXTENT:
- e = bkey_i_to_extent(k);
+ e = bkey_s_to_extent(k);
/*
* Preserve cached status since its stored in the
* first pointer
*/
- cached = bch_extent_ptrs(e) && EXTENT_CACHED(&e->v);
+ cached = EXTENT_CACHED(e.v);
- bch_extent_drop_stale(c, &e->k);
+ bch_extent_drop_stale(c, k);
mi = cache_member_info_get(c);
@@ -1392,8 +1481,9 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
do {
swapped = false;
for (i = 0; i + 1 < bch_extent_ptrs(e); i++) {
- if (PTR_TIER(mi, &e->v, i) > PTR_TIER(mi, &e->v, i + 1)) {
- swap(e->v.ptr[i], e->v.ptr[i + 1]);
+ if (PTR_TIER(mi, e.v, i) >
+ PTR_TIER(mi, e.v, i + 1)) {
+ swap(e.v->ptr[i], e.v->ptr[i + 1]);
swapped = true;
}
}
@@ -1408,20 +1498,19 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
if (!have_data) {
bch_set_extent_ptrs(e, 0);
if (cached) {
- k->type = KEY_TYPE_DISCARD;
- if (!k->version)
+ k.k->type = KEY_TYPE_DISCARD;
+ if (!k.k->version)
return true;
} else {
- k->type = KEY_TYPE_ERROR;
+ k.k->type = KEY_TYPE_ERROR;
}
} else {
- SET_EXTENT_CACHED(&e->v, cached);
+ SET_EXTENT_CACHED(e.v, cached);
}
return false;
default:
BUG();
- return false;
}
}
@@ -1433,17 +1522,16 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey *k)
* as the pointers are sorted by tier, hence preferring pointers to tier 0
* rather than pointers to tier 1.
*/
-
struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *c,
- const struct bkey *k,
+ struct bkey_s_c k,
const struct bch_extent_ptr **ptr,
struct cache *avoid)
{
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
const struct bch_extent_ptr *i;
struct cache *ca, *picked = NULL;
- switch (k->type) {
+ switch (k.k->type) {
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
case KEY_TYPE_COOKIE:
@@ -1457,7 +1545,7 @@ struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *c,
* Note: If DEV is PTR_LOST_DEV, PTR_CACHE returns NULL
* so if there are no other pointers, we'll return ERR_PTR(-EIO).
*/
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
rcu_read_lock();
extent_for_each_online_device(c, e, i, ca)
@@ -1477,7 +1565,7 @@ struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *c,
rcu_read_unlock();
/* data missing that's not supposed to be? */
- return EXTENT_CACHED(&e->v)
+ return EXTENT_CACHED(e.v)
? NULL
: ERR_PTR(-EIO);
@@ -1489,21 +1577,21 @@ struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *c,
#if 0
static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
{
- return (l->val[bch_extent_ptrs(l)] + r->val[bch_extent_ptrs(r)]) &
+ return (l->val[bkeyp_extent_ptrs(l)] + r->val[bkeyp_extent_ptrs(r)]) &
~((uint64_t)1 << 63);
}
#endif
-static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
+static enum merge_result bch_extent_merge(struct btree_keys *bk,
+ struct bkey_i *l, struct bkey_i *r)
{
struct btree *b = container_of(bk, struct btree, keys);
- struct bkey_i_extent *el;
- struct bkey_i_extent *er;
+ struct bkey_s_extent el, er;
struct cache *ca;
unsigned i;
if (key_merging_disabled(b->c))
- return false;
+ return BCH_MERGE_NOMERGE;
/*
* Generic header checks
@@ -1511,13 +1599,13 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
* Left and right must be exactly aligned
*/
- if (l->u64s != r->u64s ||
- l->type != r->type ||
- l->version != r->version ||
- bkey_cmp(l->p, bkey_start_pos(r)))
- return false;
+ if (l->k.u64s != r->k.u64s ||
+ l->k.type != r->k.type ||
+ l->k.version != r->k.version ||
+ bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+ return BCH_MERGE_NOMERGE;
- switch (l->type) {
+ switch (l->k.type) {
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
@@ -1525,17 +1613,17 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
break;
case BCH_EXTENT:
- el = bkey_i_to_extent(l);
- er = bkey_i_to_extent(r);
+ el = bkey_i_to_s_extent(l);
+ er = bkey_i_to_s_extent(r);
for (i = 0; i < bch_extent_ptrs(el); i++) {
/*
* compare all the pointer fields at once, adding the
* size to the left pointer's offset:
*/
- if (el->v.ptr[i]._val + PTR(0, el->k.size, 0)._val !=
- er->v.ptr[i]._val)
- return false;
+ if (el.v->ptr[i]._val + PTR(0, el.k->size, 0)._val !=
+ er.v->ptr[i]._val)
+ return BCH_MERGE_NOMERGE;
/*
* we don't allow extent pointers to straddle buckets -
@@ -1543,27 +1631,27 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
* size so we can't check
*/
rcu_read_lock();
- if (!(ca = PTR_CACHE(b->c, &el->v.ptr[i])) ||
- PTR_BUCKET_NR(ca, &el->v.ptr[i]) !=
- PTR_BUCKET_NR(ca, &er->v.ptr[i])) {
+ if (!(ca = PTR_CACHE(b->c, &el.v->ptr[i])) ||
+ PTR_BUCKET_NR(ca, &el.v->ptr[i]) !=
+ PTR_BUCKET_NR(ca, &er.v->ptr[i])) {
rcu_read_unlock();
- return false;
+ return BCH_MERGE_NOMERGE;
}
rcu_read_unlock();
}
break;
default:
- return false;
+ return BCH_MERGE_NOMERGE;
}
/* Keys with no pointers aren't restricted to one bucket and could
* overflow KEY_SIZE
*/
- if ((u64) l->size + r->size > KEY_SIZE_MAX) {
- bch_key_resize(l, KEY_SIZE_MAX);
- bch_cut_front(l->p, r);
- return false;
+ if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
+ bch_key_resize(&l->k, KEY_SIZE_MAX);
+ bch_cut_front(l->k.p, r);
+ return BCH_MERGE_PARTIAL;
}
#if 0
if (KEY_CSUM(l)) {
@@ -1573,30 +1661,88 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
SET_KEY_CSUM(l, 0);
}
#endif
- bch_key_resize(l, l->size + r->size);
+ bch_key_resize(&l->k, l->k.size + r->k.size);
- return true;
+ return BCH_MERGE_MERGE;
+}
+
+static bool extent_i_save(struct bkey_packed *dst, struct bkey_i *src,
+ const struct bkey_format *f)
+{
+ struct bkey_i *dst_unpacked;
+ bool ret;
+
+ BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
+
+ if ((dst_unpacked = packed_to_bkey(dst))) {
+ bkey_copy(dst_unpacked, src);
+ ret = true;
+ } else {
+ ret = bkey_pack(dst, src, f);
+ }
+
+ return ret;
}
+/*
+ * When merging an extent that we're inserting into a btree node, the new merged
+ * extent could overlap with an existing 0 size extent - if we don't fix that,
+ * it'll break the btree node iterator so this code finds those 0 size extents
+ * and shifts them out of the way.
+ *
+ * Also unpacks and repacks.
+ */
static bool bch_extent_merge_inline(struct btree_keys *b,
struct btree_node_iter *iter,
- struct bkey *l, struct bkey *r)
+ struct bkey_packed *l,
+ struct bkey_packed *r)
{
+ const struct bkey_format *f = &b->set->data->format;
struct bset_tree *t;
- struct bkey *k, *m;
-
- if (!bch_extent_merge(b, l, r))
- return false;
+ struct bkey_packed *k, *m;
+ struct bkey uk;
+ BKEY_PADDED(k) li;
+ BKEY_PADDED(k) ri;
+ struct bkey_i *mi;
+ bool ret;
if (l >= b->set->data->start &&
- l < bset_bkey_last(bset_tree_last(b)->data))
+ l < bset_bkey_last(bset_tree_last(b)->data)) {
+ bkey_unpack(&li.k, f, l);
+ bkey_copy(&ri.k, packed_to_bkey(r));
m = l;
- else if (r >= b->set->data->start &&
- r < bset_bkey_last(bset_tree_last(b)->data))
+ mi = &li.k;
+ } else if (r >= b->set->data->start &&
+ r < bset_bkey_last(bset_tree_last(b)->data)) {
+ bkey_unpack(&ri.k, f, r);
+ bkey_copy(&li.k, packed_to_bkey(l));
m = r;
- else
+ mi = &ri.k;
+ } else
BUG();
+ switch (bch_extent_merge(b, &li.k, &ri.k)) {
+ case BCH_MERGE_NOMERGE:
+ return false;
+ case BCH_MERGE_PARTIAL:
+ if (!extent_i_save(m, mi, f))
+ return false;
+
+ if (m == r)
+ bkey_copy(packed_to_bkey(l), &li.k);
+ else
+ bkey_copy(packed_to_bkey(r), &ri.k);
+
+ ret = false;
+ break;
+ case BCH_MERGE_MERGE:
+ if (!extent_i_save(m, &li.k, f))
+ return false;
+
+ ret = true;
+ break;
+ }
+
/*
* l is the output of bch_extent_merge(), m is the extent that was in
* the btree.
@@ -1605,7 +1751,6 @@ static bool bch_extent_merge_inline(struct btree_keys *b,
* position and search from there for 0 size extents that overlap with
* m.
*/
-
for (t = b->set; t <= b->set + b->nsets; t++) {
if (!t->data->u64s ||
(m >= t->data->start &&
@@ -1626,28 +1771,36 @@ static bool bch_extent_merge_inline(struct btree_keys *b,
* position) - walk backwards to find them
*/
for (;
- k && bkey_cmp(k->p, bkey_start_pos(l)) > 0;
+ k &&
+ (uk = bkey_unpack_key(f, k),
+ bkey_cmp(uk.p, bkey_start_pos(&li.k.k)) > 0);
k = bkey_prev(b, t, k)) {
- if (bkey_cmp(k->p, l->p) >= 0)
+ if (bkey_cmp(uk.p, li.k.k.p) >= 0)
continue;
BUG_ON(!bkey_deleted(k));
- k->p = bkey_start_pos(l);
+ uk.p = bkey_start_pos(&li.k.k);
+ extent_save(k, &uk, f);
+
bch_bset_fix_invalidated_key(b, k);
}
} else {
/* Front merge - walk forwards */
for (;
k != bset_bkey_last(t->data) &&
- bkey_cmp(k->p, l->p) < 0;
+ (uk = bkey_unpack_key(f, k),
+ bkey_cmp(uk.p, li.k.k.p) < 0);
k = bkey_next(k)) {
- if (bkey_cmp(k->p, bkey_start_pos(l)) <= 0)
+ if (bkey_cmp(uk.p,
+ bkey_start_pos(&li.k.k)) <= 0)
continue;
BUG_ON(!bkey_deleted(k));
- k->p = l->p;
+ uk.p = li.k.k.p;
+ extent_save(k, &uk, f);
+
bch_bset_fix_invalidated_key(b, k);
}
}
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index f139c62feb09..6f1ec6be9b2a 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -2,16 +2,13 @@
#define _BCACHE_EXTENTS_H
#include "bkey.h"
-#include "bset.h"
-#include "journal_types.h"
void bch_key_sort_fix_overlapping(struct btree_keys *, struct bset *,
struct btree_node_iter *);
void bch_extent_sort_fix_overlapping(struct btree_keys *, struct bset *,
struct btree_node_iter *);
-bool bch_insert_fixup_key(struct btree *,
- struct bkey *,
+bool bch_insert_fixup_key(struct btree *, struct bkey_i *,
struct btree_node_iter *,
struct bch_replace_info *,
struct bpos *,
@@ -23,85 +20,88 @@ extern const struct bkey_ops bch_bkey_extent_ops;
extern const struct btree_keys_ops bch_btree_interior_node_ops;
extern const struct btree_keys_ops *bch_btree_ops[];
-struct bkey;
struct cache_set;
+struct journal_res;
struct cache *bch_btree_pick_ptr(struct cache_set *, const struct btree *,
const struct bch_extent_ptr **);
-struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *,
- const struct bkey *,
+struct cache *bch_extent_pick_ptr_avoiding(struct cache_set *, struct bkey_s_c,
const struct bch_extent_ptr **,
struct cache *);
-static inline struct cache *bch_extent_pick_ptr(struct cache_set *c,
- const struct bkey *k,
- const struct bch_extent_ptr **ptr)
+static inline struct cache *
+bch_extent_pick_ptr(struct cache_set *c, struct bkey_s_c k,
+ const struct bch_extent_ptr **ptr)
{
return bch_extent_pick_ptr_avoiding(c, k, ptr, NULL);
}
-bool bch_insert_fixup_extent(struct btree *, struct bkey *,
+bool bch_insert_fixup_extent(struct btree *, struct bkey_i *,
struct btree_node_iter *,
struct bch_replace_info *, struct bpos *,
struct journal_res *);
-unsigned bch_extent_nr_ptrs_after_normalize(const struct cache_set *,
- const struct bkey *);
-void bch_extent_drop_stale(struct cache_set *, struct bkey *);
-bool bch_extent_normalize(struct cache_set *, struct bkey *);
+unsigned bch_extent_nr_ptrs_after_normalize(const struct btree *,
+ const struct bkey_packed *);
+void bch_extent_drop_stale(struct cache_set *c, struct bkey_s);
+bool bch_extent_normalize(struct cache_set *, struct bkey_s);
int __bch_add_sectors(struct cache_set *, struct btree *,
- const struct bkey_i_extent *, u64, int, bool);
+ struct bkey_s_c_extent, u64, int, bool);
-static inline bool bkey_extent_cached(const struct bkey *k)
+static inline bool bkey_extent_cached(struct bkey_s_c k)
{
- return k->type == BCH_EXTENT &&
- EXTENT_CACHED(&bkey_i_to_extent_c(k)->v);
+ return k.k->type == BCH_EXTENT &&
+ EXTENT_CACHED(bkey_s_c_to_extent(k).v);
}
-static inline unsigned bch_extent_ptrs(const struct bkey_i_extent *e)
-{
- return bkey_val_u64s(&e->k);
-}
+#define bch_extent_ptrs(_e) bkey_val_u64s((_e).k)
-static inline void bch_set_extent_ptrs(struct bkey_i_extent *e, unsigned i)
+static inline void bch_set_extent_ptrs(struct bkey_s_extent e, unsigned i)
{
BUG_ON(i > BKEY_EXTENT_PTRS_MAX);
- set_bkey_val_u64s(&e->k, i);
+ set_bkey_val_u64s(e.k, i);
}
-static inline void bch_extent_drop_ptr(struct bkey *k, unsigned ptr)
+static inline void bch_extent_drop_ptr(struct bkey_s_extent e,
+ unsigned ptr)
{
- struct bkey_i_extent *e = bkey_i_to_extent(k);
-
- BUG_ON(bch_extent_ptrs(e) > BKEY_EXTENT_PTRS_MAX);
- BUG_ON(ptr >= bch_extent_ptrs(e));
+ BUG_ON(bch_extent_ptrs(extent_s_to_s_c(e)) > BKEY_EXTENT_PTRS_MAX);
+ BUG_ON(ptr >= bch_extent_ptrs(extent_s_to_s_c(e)));
- e->k.u64s--;
- memmove(&e->v.ptr[ptr],
- &e->v.ptr[ptr + 1],
- (bch_extent_ptrs(e) - ptr) * sizeof(u64));
+ e.k->u64s--;
+ memmove(&e.v->ptr[ptr],
+ &e.v->ptr[ptr + 1],
+ (bch_extent_ptrs(extent_s_to_s_c(e)) - ptr) * sizeof(u64));
}
static inline unsigned bch_extent_replicas_needed(const struct cache_set *c,
- const struct bkey_i_extent *e)
+ const struct bch_extent *e)
{
- return EXTENT_CACHED(&e->v) ? 0 : CACHE_SET_DATA_REPLICAS_WANT(&c->sb);
+ return EXTENT_CACHED(e) ? 0 : CACHE_SET_DATA_REPLICAS_WANT(&c->sb);
}
-static inline bool bch_extent_ptr_is_dirty(const struct cache_set *c,
- const struct bkey_i_extent *e,
- const struct bch_extent_ptr *ptr)
+static inline bool __bch_extent_ptr_is_dirty(const struct cache_set *c,
+ const struct bch_extent *e,
+ const struct bch_extent_ptr *ptr,
+ unsigned nr_ptrs)
{
/* Dirty pointers come last */
return ptr + bch_extent_replicas_needed(c, e) >=
- e->v.ptr + bch_extent_ptrs(e);
+ e->ptr + nr_ptrs;
+}
+
+static inline bool bch_extent_ptr_is_dirty(const struct cache_set *c,
+ struct bkey_s_c_extent e,
+ const struct bch_extent_ptr *ptr)
+{
+ return __bch_extent_ptr_is_dirty(c, e.v, ptr, bch_extent_ptrs(e));
}
#define extent_for_each_ptr(_extent, _ptr) \
- for ((_ptr) = (_extent)->v.ptr; \
- (_ptr) < (_extent)->v.ptr + bch_extent_ptrs(_extent); \
+ for ((_ptr) = (_extent).v->ptr; \
+ (_ptr) < (_extent).v->ptr + bch_extent_ptrs(_extent); \
(_ptr)++)
/*
@@ -109,30 +109,29 @@ static inline bool bch_extent_ptr_is_dirty(const struct cache_set *c,
* Any reason we shouldn't just always do this?
*/
#define extent_for_each_ptr_backwards(_extent, _ptr) \
- for ((_ptr) = (_extent)->v.ptr + bch_extent_ptrs(_extent) - 1; \
- (_ptr) >= (_extent)->v.ptr; \
+ for ((_ptr) = (_extent).v->ptr + bch_extent_ptrs(_extent) - 1; \
+ (_ptr) >= (_extent).v->ptr; \
--(_ptr))
#define __extent_next_online_device(_c, _extent, _ptr, _ca) \
({ \
(_ca) = NULL; \
\
- while ((_ptr) < (_extent)->v.ptr + bch_extent_ptrs(_extent) &&\
+ while ((_ptr) < (_extent).v->ptr + bch_extent_ptrs(_extent) &&\
!((_ca) = PTR_CACHE(_c, _ptr))) \
(_ptr)++; \
(_ca); \
})
#define extent_for_each_online_device(_c, _extent, _ptr, _ca) \
- for ((_ptr) = (_extent)->v.ptr; \
+ for ((_ptr) = (_extent).v->ptr; \
((_ca) = __extent_next_online_device(_c, _extent, _ptr, _ca));\
(_ptr)++)
-bool bch_extent_has_device(const struct bkey_i_extent *, unsigned);
-void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
- unsigned);
+bool bch_extent_has_device(struct bkey_s_c_extent, unsigned);
+void bch_bkey_copy_single_ptr(struct bkey_i *, struct bkey_s_c, unsigned);
-bool bch_cut_front(struct bpos, struct bkey *);
+bool bch_cut_front(struct bpos, struct bkey_i *);
bool bch_cut_back(struct bpos, struct bkey *);
void bch_key_resize(struct bkey *, unsigned);
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index 0af73dfa4519..71df92ccad9a 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -24,7 +24,7 @@ struct bch_inode_info {
static struct kmem_cache *bch_inode_cache;
-static void bch_vfs_inode_init(struct bch_inode_info *);
+static void bch_inode_init(struct bch_inode_info *);
static int bch_read_single_page(struct page *, struct address_space *);
static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
@@ -54,7 +54,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
BUG_ON(ei->inode.k.u64s != sizeof(ei->inode) / sizeof(u64));
- bch_vfs_inode_init(ei);
+ bch_inode_init(ei);
unlock_new_inode(inode);
return inode;
@@ -78,7 +78,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
ei = to_bch_ei(inode);
- bi = &bkey_inode_init(&ei->inode.k)->v;
+ bi = &bkey_inode_init(&ei->inode.k_i)->v;
bi->i_uid = i_uid_read(inode);
bi->i_gid = i_gid_read(inode);
@@ -89,7 +89,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
bi->i_nlink = S_ISDIR(mode) ? 2 : 1;
/* XXX: init bch_inode */
- ret = bch_inode_create(c, &ei->inode.k,
+ ret = bch_inode_create(c, &ei->inode.k_i,
BLOCKDEV_INODE_MAX,
BCACHE_USER_INODE_RANGE,
&c->unused_inode_hint);
@@ -98,7 +98,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
return ERR_PTR(ret);
}
- bch_vfs_inode_init(ei);
+ bch_inode_init(ei);
insert_inode_hash(inode);
return inode;
@@ -144,7 +144,7 @@ static int __bch_write_inode(struct inode *inode)
bi->i_mtime = timespec_to_ns(&inode->i_mtime);
bi->i_ctime = timespec_to_ns(&inode->i_ctime);
- bch_inode_update(c, &ei->inode.k);
+ bch_inode_update(c, &ei->inode.k_i);
return 0;
}
@@ -413,7 +413,7 @@ static int bch_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
struct cache_set *c = inode->i_sb->s_fs_info;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
int ret = 0;
if (start + len < start)
@@ -421,19 +421,19 @@ static int bch_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, k,
POS(inode->i_ino, start))
- if (k->type == BCH_EXTENT) {
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ if (k.k->type == BCH_EXTENT) {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
- if (bkey_cmp(bkey_start_pos(&e->k),
+ if (bkey_cmp(bkey_start_pos(e.k),
POS(inode->i_ino, start + len)) >= 0)
break;
extent_for_each_ptr(e, ptr) {
ret = fiemap_fill_next_extent(fieinfo,
- bkey_start_offset(&e->k),
- PTR_OFFSET(ptr),
- e->k.size, 0);
+ bkey_start_offset(e.k),
+ PTR_OFFSET(ptr),
+ e.k->size, 0);
if (ret < 0)
goto out;
}
@@ -653,7 +653,7 @@ static void bch_writepage_do_io(struct bch_writepage_io *io)
{
pr_debug("writing %u sectors to %llu:%llu",
bio_sectors(&io->bio.bio),
- io->op.insert_key.p.inode,
+ io->op.insert_key.k.p.inode,
(u64) io->bio.bio.bi_iter.bi_sector);
closure_call(&io->op.cl, bch_write, NULL, &io->cl);
@@ -691,7 +691,8 @@ again:
closure_init(&w->io->cl, NULL);
bch_write_op_init(&w->io->op, w->c, bio, NULL,
- &KEY(w->inum, 0, 0), NULL, 0);
+ bkey_to_s_c(&KEY(w->inum, 0, 0)),
+ bkey_s_c_null, 0);
}
if (bch_bio_add_page(&w->io->bio.bio, page)) {
@@ -747,7 +748,8 @@ static int bch_writepage(struct page *page, struct writeback_control *wbc)
bio->bi_max_vecs = 1;
bch_write_op_init(&io->op, c, bio, NULL,
- &KEY(inode->i_ino, 0, 0), NULL, 0);
+ bkey_to_s_c(&KEY(inode->i_ino, 0, 0)),
+ bkey_s_c_null, 0);
bch_bio_add_page(bio, page);
@@ -880,7 +882,7 @@ static const struct address_space_operations bch_address_space_operations = {
.error_remove_page = generic_error_remove_page,
};
-static void bch_vfs_inode_init(struct bch_inode_info *ei)
+static void bch_inode_init(struct bch_inode_info *ei)
{
struct inode *inode = &ei->vfs_inode;
struct bch_inode *bi = &ei->inode.v;
@@ -968,7 +970,7 @@ static void bch_evict_inode(struct inode *inode)
static u64 bch_count_inodes(struct cache_set *c)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
u64 inodes = 0;
for_each_btree_key(&iter, c, BTREE_ID_INODES, k, POS_MIN)
diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c
index 9b418371d31d..916b6042517a 100644
--- a/drivers/md/bcache/gc.c
+++ b/drivers/md/bcache/gc.c
@@ -24,7 +24,7 @@
#include <trace/events/bcache.h>
u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c,
- const struct bkey_i_extent *e)
+ struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
struct cache *ca;
@@ -46,16 +46,16 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c,
return max_stale;
}
-u8 __bch_btree_mark_key(struct cache_set *c, int level, const struct bkey *k)
+u8 __bch_btree_mark_key(struct cache_set *c, int level, struct bkey_s_c k)
{
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
struct cache *ca;
u8 max_stale;
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
rcu_read_lock();
@@ -65,8 +65,8 @@ u8 __bch_btree_mark_key(struct cache_set *c, int level, const struct bkey *k)
extent_for_each_online_device(c, e, ptr, ca)
bch_mark_metadata_bucket(ca, PTR_BUCKET(ca, ptr), true);
} else {
- __bch_add_sectors(c, NULL, e, bkey_start_offset(k),
- k->size, false);
+ __bch_add_sectors(c, NULL, e, bkey_start_offset(e.k),
+ e.k->size, false);
}
rcu_read_unlock();
@@ -78,7 +78,7 @@ u8 __bch_btree_mark_key(struct cache_set *c, int level, const struct bkey *k)
}
static u8 btree_mark_key(struct cache_set *c, struct btree *b,
- const struct bkey *k)
+ struct bkey_s_c k)
{
return __bch_btree_mark_key(c, b->level, k);
}
@@ -92,39 +92,45 @@ static inline bool btree_node_has_ptrs(struct btree *b)
bool btree_gc_mark_node(struct cache_set *c, struct btree *b,
struct gc_stat *stat)
{
+ struct bkey_format *f = &b->keys.set->data->format;
struct bset_tree *t;
for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
btree_bug_on(t->size &&
bset_written(&b->keys, t) &&
- bkey_cmp(b->key.p, t->end.p) < 0,
+ bkey_cmp_packed(f, &b->key.k, &t->end) < 0,
b, "found short btree key in gc");
if (stat)
stat->nodes++;
/* only actually needed for the root */
- __bch_btree_mark_key(c, b->level + 1, &b->key);
+ __bch_btree_mark_key(c, b->level + 1, bkey_i_to_s_c(&b->key));
if (btree_node_has_ptrs(b)) {
- u8 stale = 0;
- unsigned keys = 0, good_keys = 0, u64s;
- struct bkey *k;
struct btree_node_iter iter;
+ struct bkey_packed *k;
+ struct bkey_tup tup;
+ unsigned keys = 0, good_keys = 0, u64s;
+ u8 stale = 0;
for_each_btree_node_key(&b->keys, k, &iter) {
- bkey_debugcheck(b, k);
+ bkey_disassemble(&tup, f, k);
+
+ bkey_debugcheck(b, bkey_tup_to_s_c(&tup));
- stale = max(stale, btree_mark_key(c, b, k));
+ stale = max(stale,
+ btree_mark_key(c, b,
+ bkey_tup_to_s_c(&tup)));
keys++;
- u64s = bch_extent_nr_ptrs_after_normalize(c, k);
+ u64s = bch_extent_nr_ptrs_after_normalize(b, k);
if (stat && u64s) {
good_keys++;
stat->key_bytes += k->u64s;
stat->nkeys++;
- stat->data += k->size;
+ stat->data += tup.k.size;
}
}
@@ -155,6 +161,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
struct keylist keylist;
struct closure cl;
struct bpos saved_pos;
+ struct bkey_format new_format;
int ret;
if (c->gc_coalesce_disabled)
@@ -184,9 +191,22 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
trace_bcache_btree_gc_coalesce(parent, nr_old_nodes);
+ bch_bkey_format_init(&new_format);
+
+ for (i = 0; i < nr_old_nodes; i++) {
+ struct btree_node_iter iter;
+ struct bkey_tup tup;
+
+ for_each_btree_node_key_unpack(&old_nodes[i]->keys, &tup, &iter)
+ bch_bkey_format_add(&new_format, &tup.k);
+ }
+
+ bch_bkey_format_done(&new_format);
+
for (i = 0; i < nr_old_nodes; i++)
- new_nodes[i] = btree_node_alloc_replacement(old_nodes[i],
- iter->btree_id);
+ new_nodes[i] = __btree_node_alloc_replacement(old_nodes[i],
+ iter->btree_id,
+ new_format);
/*
* Conceptually we concatenate the nodes together and slice them
@@ -195,7 +215,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
for (i = nr_new_nodes - 1; i > 0; --i) {
struct bset *n1 = btree_bset_first(new_nodes[i]);
struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
- struct bkey *k, *last = NULL;
+ struct bkey_packed *k, *last = NULL;
u64s = 0;
@@ -210,7 +230,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
if (u64s == n2->u64s) {
/* n2 fits entirely in n1 */
- new_nodes[i]->key.p = new_nodes[i - 1]->key.p;
+ new_nodes[i]->key.k.p = new_nodes[i - 1]->key.k.p;
memcpy(bset_bkey_last(n1),
n2->start,
@@ -227,7 +247,8 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
new_nodes[--nr_new_nodes] = NULL;
} else if (u64s) {
/* move part of n2 into n1 */
- new_nodes[i]->key.p = last->p;
+ new_nodes[i]->key.k.p =
+ bkey_unpack_key(&new_format, last).p;
memcpy(bset_bkey_last(n1),
n2->start,
@@ -255,7 +276,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
/* The keys for the old nodes get deleted */
for (i = nr_old_nodes - 1; i > 0; --i) {
*keylist.top = old_nodes[i]->key;
- set_bkey_deleted(keylist.top);
+ set_bkey_deleted(&keylist.top->k);
bch_keylist_enqueue(&keylist);
}
@@ -269,7 +290,7 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
/* hack: */
saved_pos = iter->pos;
- iter->pos = bch_keylist_front(&keylist)->p;
+ iter->pos = bch_keylist_front(&keylist)->k.p;
btree_iter_node_set(iter, parent);
/* Insert the newly coalesced nodes */
@@ -347,12 +368,12 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id,
should_rewrite = btree_gc_mark_node(c, b, stat);
- BUG_ON(bkey_cmp(c->gc_cur_pos, b->key.p) > 0);
+ BUG_ON(bkey_cmp(c->gc_cur_pos, b->key.k.p) > 0);
BUG_ON(!gc_will_visit_node(c, b));
write_seqlock(&c->gc_cur_lock);
c->gc_cur_level = b->level;
- c->gc_cur_pos = b->key.p;
+ c->gc_cur_pos = b->key.k.p;
write_sequnlock(&c->gc_cur_lock);
BUG_ON(gc_will_visit_node(c, b));
@@ -406,11 +427,11 @@ static void bch_mark_allocator_buckets(struct cache_set *c)
rcu_read_lock();
list_for_each_entry(b, &c->open_buckets_open, list) {
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
spin_lock(&b->lock);
- e = bkey_i_to_extent_c(&b->key);
+ e = bkey_i_to_s_c_extent(&b->key);
extent_for_each_online_device(c, e, ptr, ca)
bch_mark_alloc_bucket(ca, PTR_BUCKET(ca, ptr));
spin_unlock(&b->lock);
@@ -710,13 +731,15 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id)
for_each_btree_node(&iter, c, id, b, POS_MIN) {
if (btree_node_has_ptrs(b)) {
struct btree_node_iter node_iter;
- struct bkey *k;
+ struct bkey_tup tup;
- for_each_btree_node_key(&b->keys, k, &node_iter)
- btree_mark_key(c, b, k);
+ for_each_btree_node_key_unpack(&b->keys, &tup,
+ &node_iter)
+ btree_mark_key(c, b, bkey_tup_to_s_c(&tup));
}
- __bch_btree_mark_key(c, iter.level + 1, &b->key);
+ __bch_btree_mark_key(c, iter.level + 1,
+ bkey_i_to_s_c(&b->key));
}
bch_btree_iter_unlock(&iter);
diff --git a/drivers/md/bcache/gc.h b/drivers/md/bcache/gc.h
index 7d5afdf334f4..d1ea229b1c5f 100644
--- a/drivers/md/bcache/gc.h
+++ b/drivers/md/bcache/gc.h
@@ -15,9 +15,8 @@ static inline void set_gc_sectors(struct cache_set *c)
void bch_gc(struct cache_set *);
int bch_gc_thread_start(struct cache_set *);
int bch_initial_gc(struct cache_set *, struct list_head *);
-u8 bch_btree_key_recalc_oldest_gen(struct cache_set *,
- const struct bkey_i_extent *);
-u8 __bch_btree_mark_key(struct cache_set *, int, const struct bkey *);
+u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c_extent);
+u8 __bch_btree_mark_key(struct cache_set *, int, struct bkey_s_c);
bool btree_gc_mark_node(struct cache_set *, struct btree *,
struct gc_stat *);
@@ -34,8 +33,8 @@ static inline bool __gc_will_visit_node(struct cache_set *c,
{
return b->btree_id != c->gc_cur_btree
? b->btree_id > c->gc_cur_btree
- : bkey_cmp(b->key.p, c->gc_cur_pos)
- ? bkey_cmp(b->key.p, c->gc_cur_pos) > 0
+ : bkey_cmp(b->key.k.p, c->gc_cur_pos)
+ ? bkey_cmp(b->key.k.p, c->gc_cur_pos) > 0
: b->level > c->gc_cur_level;
}
diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c
index 275666ab6c54..1a9f88befc3c 100644
--- a/drivers/md/bcache/inode.c
+++ b/drivers/md/bcache/inode.c
@@ -51,26 +51,26 @@ ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
}
}
-static bool bch_inode_invalid(const struct cache_set *c, const struct bkey *k)
+static bool bch_inode_invalid(const struct cache_set *c, struct bkey_s_c k)
{
- if (k->p.offset)
+ if (k.k->p.offset)
return true;
- switch (k->type) {
+ switch (k.k->type) {
case BCH_INODE_FS:
- if (bkey_bytes(k) != sizeof(struct bkey_i_inode))
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_inode))
return true;
- if (k->p.inode < BLOCKDEV_INODE_MAX)
+ if (k.k->p.inode < BLOCKDEV_INODE_MAX)
return true;
return false;
case BCH_INODE_BLOCKDEV:
- if (bkey_bytes(k) != sizeof(struct bkey_i_inode_blockdev))
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev))
return true;
- if (k->p.inode >= BLOCKDEV_INODE_MAX &&
- k->p.inode < BCACHE_USER_INODE_RANGE)
+ if (k.k->p.inode >= BLOCKDEV_INODE_MAX &&
+ k.k->p.inode < BCACHE_USER_INODE_RANGE)
return true;
return false;
@@ -86,11 +86,11 @@ const struct bkey_ops bch_bkey_inode_ops = {
.key_invalid = bch_inode_invalid,
};
-int bch_inode_create(struct cache_set *c, struct bkey *inode,
+int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
u64 min, u64 max, u64 *hint)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
bool searched_from_start = false;
int ret;
@@ -102,15 +102,15 @@ int bch_inode_create(struct cache_set *c, struct bkey *inode,
again:
bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(*hint, 0));
- while ((k = bch_btree_iter_peek_with_holes(&iter))) {
- if (max && k->p.inode >= max)
+ while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
+ if (max && k.k->p.inode >= max)
break;
- if (!bkey_val_u64s(k)) {
- inode->p = k->p;
+ if (k.k->type < BCH_INODE_FS) {
+ inode->k.p = k.k->p;
pr_debug("inserting inode %llu (size %u)",
- inode->p.inode, inode->u64s);
+ inode->k.p.inode, inode->k.u64s);
ret = bch_btree_insert_at(&iter, &keylist_single(inode),
NULL, NULL, 0, BTREE_INSERT_ATOMIC);
@@ -120,7 +120,7 @@ again:
bch_btree_iter_unlock(&iter);
if (!ret)
- *hint = k->p.inode + 1;
+ *hint = k.k->p.inode + 1;
return ret;
} else {
@@ -140,7 +140,7 @@ again:
return -ENOSPC;
}
-int bch_inode_update(struct cache_set *c, struct bkey *inode)
+int bch_inode_update(struct cache_set *c, struct bkey_i *inode)
{
return bch_btree_insert(c, BTREE_ID_INODES,
&keylist_single(inode),
@@ -155,15 +155,15 @@ int bch_inode_truncate(struct cache_set *c, u64 inode_nr, u64 new_size)
int bch_inode_rm(struct cache_set *c, u64 inode_nr)
{
- struct bkey delete;
+ struct bkey_i delete;
int ret;
ret = bch_inode_truncate(c, inode_nr, 0);
if (ret < 0)
return ret;
- bkey_init(&delete);
- delete.p.inode = inode_nr;
+ bkey_init(&delete.k);
+ delete.k.p.inode = inode_nr;
return bch_btree_insert(c, BTREE_ID_INODES,
&keylist_single(&delete),
@@ -174,15 +174,15 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
struct bkey_i_inode *ret)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES, k,
POS(inode_nr, 0)) {
/* hole, not found */
- if (!bkey_val_u64s(k))
+ if (bkey_deleted(k.k))
break;
- bkey_copy(ret, k);
+ bkey_reassemble(&ret->k_i, k);
bch_btree_iter_unlock(&iter);
return 0;
@@ -197,22 +197,23 @@ static int __bch_blockdev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid,
u64 start_inode, u64 end_inode)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
for_each_btree_key(&iter, c, BTREE_ID_INODES, k,
POS(start_inode, 0)) {
- if (k->p.inode >= end_inode)
+ if (k.k->p.inode >= end_inode)
break;
- if (k->type == BCH_INODE_BLOCKDEV) {
- const struct bkey_i_inode_blockdev *inode =
- bkey_i_to_inode_blockdev_c(k);
+ if (k.k->type == BCH_INODE_BLOCKDEV) {
+ struct bkey_s_c_inode_blockdev inode =
+ bkey_s_c_to_inode_blockdev(k);
pr_debug("found inode %llu: %pU (u64s %u)",
- k->p.inode, inode->v.i_uuid.b, k->u64s);
+ inode.k->p.inode, inode.v->i_uuid.b,
+ inode.k->u64s);
- if (!memcmp(uuid, &inode->v.i_uuid, 16)) {
- *ret = *inode;
+ if (!memcmp(uuid, &inode.v->i_uuid, 16)) {
+ bkey_reassemble(&ret->k_i, k);
bch_btree_iter_unlock(&iter);
return 0;
}
diff --git a/drivers/md/bcache/inode.h b/drivers/md/bcache/inode.h
index 01ef14145360..d95a2879c421 100644
--- a/drivers/md/bcache/inode.h
+++ b/drivers/md/bcache/inode.h
@@ -6,8 +6,8 @@ extern const struct bkey_ops bch_bkey_inode_ops;
ssize_t bch_inode_status(char *, size_t, const struct bkey *);
-int bch_inode_create(struct cache_set *, struct bkey *, u64, u64, u64 *);
-int bch_inode_update(struct cache_set *, struct bkey *);
+int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *);
+int bch_inode_update(struct cache_set *, struct bkey_i *);
int bch_inode_truncate(struct cache_set *, u64, u64);
int bch_inode_rm(struct cache_set *, u64);
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 47383fc9cf71..98c70587dc98 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -86,14 +86,15 @@ void bch_bbio_prep(struct bbio *b, struct cache *ca)
b->bi_bvec_done = iter->bi_bvec_done;
}
-void bch_submit_bbio(struct bbio *b, struct cache *ca, const struct bkey *k,
+/* XXX: should be bkey, not bkey_i */
+void bch_submit_bbio(struct bbio *b, struct cache *ca, const struct bkey_i *k,
const struct bch_extent_ptr *ptr, bool punt)
{
struct bio *bio = &b->bio;
b->key = *k;
b->ptr = *ptr;
- bch_set_extent_ptrs(bkey_i_to_extent(&b->key), 1);
+ bch_set_extent_ptrs(bkey_i_to_s_extent(&b->key), 1);
bch_bbio_prep(b, ca);
b->submit_time_us = local_clock_us();
@@ -106,10 +107,10 @@ void bch_submit_bbio(struct bbio *b, struct cache *ca, const struct bkey *k,
}
void bch_submit_bbio_replicas(struct bio *bio, struct cache_set *c,
- const struct bkey *k, unsigned ptrs_from,
+ const struct bkey_i *k, unsigned ptrs_from,
bool punt)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
struct cache *ca;
unsigned ptr;
@@ -117,13 +118,14 @@ void bch_submit_bbio_replicas(struct bio *bio, struct cache_set *c,
ptr < bch_extent_ptrs(e);
ptr++) {
rcu_read_lock();
- ca = PTR_CACHE(c, &e->v.ptr[ptr]);
+ ca = PTR_CACHE(c, &e.v->ptr[ptr]);
if (ca)
percpu_ref_get(&ca->ref);
rcu_read_unlock();
if (!ca) {
- bch_submit_bbio(to_bbio(bio), ca, &e->k, &e->v.ptr[ptr], punt);
+ bch_submit_bbio(to_bbio(bio), ca, k,
+ &e.v->ptr[ptr], punt);
break;
}
@@ -132,9 +134,11 @@ void bch_submit_bbio_replicas(struct bio *bio, struct cache_set *c,
ca->replica_set);
n->bi_end_io = bio->bi_end_io;
n->bi_private = bio->bi_private;
- bch_submit_bbio(to_bbio(n), ca, &e->k, &e->v.ptr[ptr], punt);
+ bch_submit_bbio(to_bbio(n), ca, k,
+ &e.v->ptr[ptr], punt);
} else {
- bch_submit_bbio(to_bbio(bio), ca, &e->k, &e->v.ptr[ptr], punt);
+ bch_submit_bbio(to_bbio(bio), ca, k,
+ &e.v->ptr[ptr], punt);
}
}
}
@@ -144,8 +148,8 @@ static void bch_bbio_reset(struct bbio *b)
struct bvec_iter *iter = &b->bio.bi_iter;
bio_reset(&b->bio);
- iter->bi_sector = bkey_start_offset(&b->key);
- iter->bi_size = b->key.size << 9;
+ iter->bi_sector = bkey_start_offset(&b->key.k);
+ iter->bi_size = b->key.k.size << 9;
iter->bi_idx = b->bi_idx;
iter->bi_bvec_done = b->bi_bvec_done;
}
@@ -337,12 +341,12 @@ static void bch_write_discard(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct bio *bio = op->bio;
- u64 inode = op->insert_key.p.inode;
+ u64 inode = op->insert_key.k.p.inode;
op->error = bch_discard(op->c,
POS(inode, bio->bi_iter.bi_sector),
POS(inode, bio_end_sector(bio)),
- op->insert_key.version);
+ op->insert_key.k.version);
}
static void bch_write_error(struct closure *cl)
@@ -358,15 +362,15 @@ static void bch_write_error(struct closure *cl)
* from the keys we'll accomplish just that.
*/
- struct bkey *src = bch_keylist_front(&op->insert_keys);
- struct bkey *dst = bch_keylist_front(&op->insert_keys);
+ struct bkey_i *src = bch_keylist_front(&op->insert_keys);
+ struct bkey_i *dst = bch_keylist_front(&op->insert_keys);
while (src != op->insert_keys.top) {
- struct bkey *n = bkey_next(src);
+ struct bkey_i *n = bkey_next(src);
- set_bkey_val_u64s(src, 0);
- src->type = KEY_TYPE_DISCARD;
- memmove(dst, src, bkey_bytes(src));
+ set_bkey_val_u64s(&src->k, 0);
+ src->k.type = KEY_TYPE_DISCARD;
+ memmove(dst, src, bkey_bytes(&src->k));
dst = bkey_next(dst);
src = n;
@@ -384,7 +388,7 @@ static void bch_write_endio(struct bio *bio, int error)
if (error) {
/* TODO: We could try to recover from this. */
- if (!bkey_extent_cached(&op->insert_key)) {
+ if (!bkey_extent_cached(bkey_i_to_s_c(&op->insert_key))) {
__bcache_io_error(op->c, "IO error writing data");
op->error = error;
} else if (!op->replace)
@@ -412,8 +416,8 @@ static void __bch_write(struct closure *cl)
continue_at(cl, bch_write_done, op->c->wq);
}
- bch_extent_drop_stale(op->c, &op->insert_key);
- ptrs_from = bch_extent_ptrs(bkey_i_to_extent(&op->insert_key));
+ bch_extent_drop_stale(op->c, bkey_i_to_s(&op->insert_key));
+ ptrs_from = bch_extent_ptrs(bkey_i_to_s_extent(&op->insert_key));
/*
* Journal writes are marked REQ_FLUSH; if the original write was a
@@ -422,10 +426,10 @@ static void __bch_write(struct closure *cl)
bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
do {
- struct bkey *k;
+ struct bkey_i *k;
struct bio_set *split = op->c->bio_split;
- BUG_ON(bio_sectors(bio) != op->insert_key.size);
+ BUG_ON(bio_sectors(bio) != op->insert_key.k.size);
if (open_bucket_nr == ARRAY_SIZE(op->open_buckets))
continue_at(cl, bch_write_index,
@@ -459,16 +463,16 @@ static void __bch_write(struct closure *cl)
op->open_buckets[open_bucket_nr++] = b;
- bch_cut_front(k->p, &op->insert_key);
+ bch_cut_front(k->k.p, &op->insert_key);
- n = bio_next_split(bio, k->size, GFP_NOIO, split);
+ n = bio_next_split(bio, k->k.size, GFP_NOIO, split);
n->bi_end_io = bch_write_endio;
n->bi_private = cl;
#if 0
if (KEY_CSUM(k))
bio_csum(n, k);
#endif
- trace_bcache_cache_insert(k);
+ trace_bcache_cache_insert(&k->k);
n->bi_rw |= REQ_WRITE;
#ifndef CONFIG_BCACHE_NO_IO
@@ -479,15 +483,14 @@ static void __bch_write(struct closure *cl)
bio_endio(n, 0);
#endif
- BUG_ON(bch_extent_normalize(op->c, k));
-
+ BUG_ON(bch_extent_normalize(op->c, bkey_i_to_s(k)));
bch_check_mark_super(op->c, k, false);
/*
* Do this after normalize since EXTENT_CACHED is stored
* in the first pointer
*/
- if (op->cached && k->type == BCH_EXTENT)
+ if (op->cached && k->k.type == BCH_EXTENT)
SET_EXTENT_CACHED(&bkey_i_to_extent(k)->v, true);
bch_keylist_enqueue(&op->insert_keys);
@@ -575,10 +578,10 @@ void bch_write(struct closure *cl)
{
struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
struct cache_set *c = op->c;
- u64 inode = op->insert_key.p.inode;
+ u64 inode = op->insert_key.k.p.inode;
trace_bcache_write(c, inode, op->bio,
- !bkey_extent_cached(&op->insert_key),
+ !bkey_extent_cached(bkey_i_to_s_c(&op->insert_key)),
op->discard);
if (!bio_sectors(op->bio)) {
@@ -593,7 +596,7 @@ void bch_write(struct closure *cl)
}
if (version_stress_test(c))
- op->insert_key.version = bch_rand_range(UINT_MAX);
+ op->insert_key.k.version = bch_rand_range(UINT_MAX);
/*
* This ought to be initialized in bch_write_op_init(), but struct
@@ -617,8 +620,8 @@ void bch_write(struct closure *cl)
wake_up_process(c->gc_thread);
}
- op->insert_key.p.offset = bio_end_sector(op->bio);
- op->insert_key.size = bio_sectors(op->bio);
+ op->insert_key.k.p.offset = bio_end_sector(op->bio);
+ op->insert_key.k.size = bio_sectors(op->bio);
bch_keylist_init(&op->insert_keys);
bio_get(op->bio);
@@ -667,8 +670,8 @@ EXPORT_SYMBOL(bch_write);
void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
struct bio *bio, struct write_point *wp,
- const struct bkey *insert_key,
- const struct bkey *replace_key,
+ struct bkey_s_c insert_key,
+ struct bkey_s_c replace_key,
unsigned flags)
{
if (!wp) {
@@ -692,24 +695,24 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
op->btree_alloc_reserve = BTREE_ID_EXTENTS;
bch_keylist_init(&op->insert_keys);
- bkey_copy(&op->insert_key, insert_key);
+ bkey_reassemble(&op->insert_key, insert_key);
- if (!bkey_val_u64s(&op->insert_key)) {
+ if (!bkey_val_u64s(&op->insert_key.k)) {
/*
* If the new key has no pointers, we're either doing a
* discard or we're writing new data and we're going to
* allocate pointers
*/
- op->insert_key.type = op->discard
+ op->insert_key.k.type = op->discard
? KEY_TYPE_DISCARD
: BCH_EXTENT;
}
- if (replace_key) {
+ if (replace_key.k) {
op->replace = true;
/* The caller can overwrite any replace_info fields */
memset(&op->replace_info, 0, sizeof(op->replace_info));
- bkey_copy(&op->replace_info.key, replace_key);
+ bkey_reassemble(&op->replace_info.key, replace_key);
}
}
EXPORT_SYMBOL(bch_write_op_init);
@@ -735,27 +738,28 @@ int bch_discard(struct cache_set *c, struct bpos start,
struct bpos end, u64 version)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
int ret = 0;
bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, start);
- while ((k = bch_btree_iter_peek(&iter)) &&
- bkey_cmp(bkey_start_pos(k), end) < 0) {
+ while ((k = bch_btree_iter_peek(&iter)).k &&
+ bkey_cmp(bkey_start_pos(k.k), end) < 0) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
- struct bkey erase;
+ /* really shouldn't be using a bare, unpadded bkey_i */
+ struct bkey_i erase;
struct bpos n;
/* create the biggest key we can, to minimize writes */
- bkey_init(&erase);
- erase.type = KEY_TYPE_DISCARD;
- erase.version = version;
- erase.p = bkey_start_pos(k);
- bch_key_resize(&erase, max_sectors);
+ bkey_init(&erase.k);
+ erase.k.type = KEY_TYPE_DISCARD;
+ erase.k.version = version;
+ erase.k.p = bkey_start_pos(k.k);
+ bch_key_resize(&erase.k, max_sectors);
bch_cut_front(iter.pos, &erase);
- n = erase.p;
+ n = erase.k.p;
- bch_cut_back(end, &erase);
+ bch_cut_back(end, &erase.k);
ret = bch_btree_insert_at(&iter, &keylist_single(&erase),
NULL, NULL, 0, 0);
@@ -788,7 +792,7 @@ static void cache_promote_done(struct closure *cl)
struct bio_vec *bv;
if (op->iop.replace_collision) {
- trace_bcache_promote_collision(&op->iop.replace_info.key);
+ trace_bcache_promote_collision(&op->iop.replace_info.key.k);
atomic_inc(&op->iop.c->accounting.collector.cache_miss_collisions);
}
@@ -823,8 +827,8 @@ static void cache_promote_write(struct closure *cl)
struct bio *bio = op->iop.bio;
bio_reset(bio);
- bio->bi_iter.bi_sector = bkey_start_offset(&op->iop.insert_key);
- bio->bi_iter.bi_size = sector_bytes(op->iop.insert_key.size);
+ bio->bi_iter.bi_sector = bkey_start_offset(&op->iop.insert_key.k);
+ bio->bi_iter.bi_size = sector_bytes(op->iop.insert_key.k.size);
/* needed to reinit bi_vcnt so pages can be freed later */
bch_bio_map(bio, NULL);
@@ -832,8 +836,8 @@ static void cache_promote_write(struct closure *cl)
bio_copy_data(op->orig_bio, bio);
else {
struct bbio *orig_bbio = to_bbio(op->orig_bio);
- u64 offset = sector_bytes(bkey_start_offset(&orig_bbio->key) -
- bkey_start_offset(&op->iop.insert_key));
+ u64 offset = sector_bytes(bkey_start_offset(&orig_bbio->key.k) -
+ bkey_start_offset(&op->iop.insert_key.k));
bio_copy_data_w_offsets(op->orig_bio, 0, bio, offset);
}
@@ -869,12 +873,12 @@ static void cache_promote_endio(struct bio *bio, int error)
bch_bbio_endio(b, error, "reading from cache");
}
-static uint64_t calculate_start_sector(const struct bkey *full,
+static uint64_t calculate_start_sector(struct bkey_s_c full,
struct bbio *bbio,
bool *stale)
{
- struct bkey_i_extent *need = bkey_i_to_extent(&bbio->key);
- const struct bkey_i_extent *e = bkey_i_to_extent_c(full);
+ struct bkey_s_c_extent need = bkey_i_to_s_c_extent(&bbio->key);
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(full);
struct cache *ca = bbio->ca;
struct cache *ca2;
const struct bch_extent_ptr *ptr;
@@ -914,14 +918,14 @@ static uint64_t calculate_start_sector(const struct bkey *full,
*/
void __cache_promote(struct cache_set *c, struct bbio *orig_bio,
- const struct bkey *old,
- const struct bkey *new,
+ struct bkey_s_c old,
+ struct bkey_s_c new,
unsigned write_flags)
{
bool read_full, stale;
struct cache_promote_op *op;
struct bio *bio;
- unsigned pages, size = sector_bytes(new->size);
+ unsigned pages, size = sector_bytes(new.k->size);
read_full = (test_bit(CACHE_SET_CACHE_FULL_EXTENTS, &c->flags)
&& orig_bio->ca != NULL);
@@ -985,8 +989,10 @@ void __cache_promote(struct cache_set *c, struct bbio *orig_bio,
new, old, write_flags);
if (!read_full) {
- bch_cut_front(bkey_start_pos(&orig_bio->key), &op->iop.insert_key);
- bch_cut_back(orig_bio->key.p, &op->iop.insert_key);
+ bch_cut_front(bkey_start_pos(&orig_bio->key.k),
+ &op->iop.insert_key);
+ bch_cut_back(orig_bio->key.k.p,
+ &op->iop.insert_key.k);
}
trace_bcache_promote(&orig_bio->bio);
@@ -1008,7 +1014,7 @@ out_submit:
*
* @bio must actually be a bbio with valid key.
*/
-bool cache_promote(struct cache_set *c, struct bbio *bio, const struct bkey *k)
+bool cache_promote(struct cache_set *c, struct bbio *bio, struct bkey_s_c k)
{
if (!CACHE_TIER(&bio->ca->mi)) {
generic_make_request(&bio->bio);
@@ -1119,7 +1125,7 @@ int bch_read_with_versions(struct cache_set *c,
struct bch_versions_result *v)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
bch_increment_clock(c, bio_sectors(bio), READ);
@@ -1132,12 +1138,13 @@ int bch_read_with_versions(struct cache_set *c,
const struct bch_extent_ptr *ptr;
bool done;
- BUG_ON(bkey_cmp(bkey_start_pos(k),
+ BUG_ON(bkey_cmp(bkey_start_pos(k.k),
POS(inode, bio->bi_iter.bi_sector)) > 0);
- BUG_ON(bkey_cmp(k->p, POS(inode, bio->bi_iter.bi_sector)) <= 0);
+ BUG_ON(bkey_cmp(k.k->p,
+ POS(inode, bio->bi_iter.bi_sector)) <= 0);
- sectors = k->p.offset - bio->bi_iter.bi_sector;
+ sectors = k.k->p.offset - bio->bi_iter.bi_sector;
done = sectors >= bio_sectors(bio);
ca = bch_extent_pick_ptr(c, k, &ptr);
@@ -1147,25 +1154,24 @@ int bch_read_with_versions(struct cache_set *c,
}
if (ca) {
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
__BKEY_PADDED(key, 1) tmp;
PTR_BUCKET(ca, ptr)->read_prio =
c->prio_clock[READ].hand;
- bch_bkey_copy_single_ptr(&tmp.key, &e->k, ptr - e->v.ptr);
+ bch_bkey_copy_single_ptr(&tmp.key, k, ptr - e.v->ptr);
/* Trim the key to match what we're actually reading */
bch_cut_front(POS(inode, bio->bi_iter.bi_sector),
&tmp.key);
if (sectors > bio_sectors(bio))
bch_cut_back(POS(inode, bio_end_sector(bio)),
- &tmp.key);
+ &tmp.key.k);
- if (!add_version(v,
- e->k.version,
- sector_bytes(bkey_start_offset(&tmp.key)),
- sector_bytes(tmp.key.size))) {
+ if (!add_version(v, e.k->version,
+ sector_bytes(bkey_start_offset(&tmp.key.k)),
+ sector_bytes(tmp.key.k.size))) {
percpu_ref_put(&ca->ref);
goto version_error_out;
}
@@ -1194,8 +1200,8 @@ int bch_read_with_versions(struct cache_set *c,
bio_sectors(bio)) << 9;
if (!add_version(v,
- k->version,
- sector_bytes(bkey_start_offset(k)),
+ k.k->version,
+ sector_bytes(bkey_start_offset(k.k)),
bytes))
goto version_error_out;
@@ -1256,7 +1262,7 @@ static void bch_read_retry(struct bbio *bbio)
* The inode, offset and size come from the bbio's key,
* which was set by bch_read_fn().
*/
- inode = bbio->key.p.inode;
+ inode = bbio->key.k.p.inode;
parent = bio->bi_private;
bch_bbio_reset(bbio);
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index 186baf16fa11..d535c2585bf0 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -11,16 +11,16 @@ struct bio *bch_bbio_alloc(struct cache_set *);
void bch_generic_make_request(struct bio *, struct cache_set *);
void bch_bio_submit_work(struct work_struct *);
void bch_bbio_prep(struct bbio *, struct cache *);
-void bch_submit_bbio(struct bbio *, struct cache *, const struct bkey *,
+void bch_submit_bbio(struct bbio *, struct cache *, const struct bkey_i *,
const struct bch_extent_ptr *, bool);
void bch_submit_bbio_replicas(struct bio *, struct cache_set *,
- const struct bkey *, unsigned, bool);
+ const struct bkey_i *, unsigned, bool);
int bch_discard(struct cache_set *, struct bpos, struct bpos, u64);
-void __cache_promote(struct cache_set *, struct bbio *, const struct bkey *,
- const struct bkey *, unsigned);
-bool cache_promote(struct cache_set *, struct bbio *, const struct bkey *);
+void __cache_promote(struct cache_set *, struct bbio *,
+ struct bkey_s_c, struct bkey_s_c, unsigned);
+bool cache_promote(struct cache_set *, struct bbio *, struct bkey_s_c);
void bch_read_race_work(struct work_struct *);
void bch_wake_delayed_writes(unsigned long data);
diff --git a/drivers/md/bcache/ioctl.c b/drivers/md/bcache/ioctl.c
index 393bfc3685e3..a73b886492eb 100644
--- a/drivers/md/bcache/ioctl.c
+++ b/drivers/md/bcache/ioctl.c
@@ -235,7 +235,7 @@ static void bch_ioctl_write(struct kiocb *req, struct cache_set *c,
bio->bi_io_vec = bio->bi_inline_vecs;
bch_write_op_init(&op->iop, c, bio, NULL,
- &extent, NULL, flags);
+ bkey_to_s_c(&extent), bkey_s_c_null, flags);
ret = bio_get_user_pages(bio, i.buf,
extent.size << 9, 0);
@@ -263,7 +263,7 @@ static void bch_ioctl_write(struct kiocb *req, struct cache_set *c,
static int __bch_list_keys(struct cache_set *c, struct bch_ioctl_list_keys *i)
{
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
int ret = 0;
unsigned type;
BKEY_PADDED(k) prev_key;
@@ -275,43 +275,44 @@ static int __bch_list_keys(struct cache_set *c, struct bch_ioctl_list_keys *i)
for_each_btree_key(&iter, c, i->btree_id, k, i->start) {
BKEY_PADDED(k) tmp;
- type = k->type;
+ type = k.k->type;
- if (bkey_cmp(bkey_start_pos(k), i->end) >= 0)
+ if (bkey_cmp(bkey_start_pos(k.k), i->end) >= 0)
break;
if (!(i->flags & BCH_IOCTL_LIST_VALUES)) {
- tmp.k = *k;
- set_bkey_deleted(&tmp.k);
- k = &tmp.k;
+ tmp.k.k = *k.k;
+ set_bkey_deleted(&tmp.k.k);
+ k = bkey_i_to_s_c(&tmp.k);
}
if (i->btree_id == BTREE_ID_EXTENTS) {
- if (type == KEY_TYPE_DISCARD && k->version == 0)
+ if (type == KEY_TYPE_DISCARD && k.k->version == 0)
continue;
- if (k != &tmp.k) {
- bkey_copy(&tmp.k, k);
- k = &tmp.k;
+ if (i->flags & BCH_IOCTL_LIST_VALUES) {
+ bkey_reassemble(&tmp.k, k);
+ k = bkey_i_to_s_c(&tmp.k);
}
- if (bkey_cmp(i->start, bkey_start_pos(k)) > 0)
+ if (bkey_cmp(i->start, bkey_start_pos(&tmp.k.k)) > 0)
bch_cut_front(i->start, &tmp.k);
- if (bkey_cmp(i->end, k->p) <= 0)
- bch_cut_back(i->end, &tmp.k);
+ if (bkey_cmp(i->end, tmp.k.k.p) <= 0)
+ bch_cut_back(i->end, &tmp.k.k);
if (i->keys_found &&
bch_bkey_try_merge(&iter.nodes[0]->keys,
- &prev_key.k, &tmp.k)) {
- i->keys_found -= prev_key.k.u64s;
- k = &prev_key.k;
+ &prev_key.k,
+ &tmp.k)) {
+ i->keys_found -= prev_key.k.k.u64s;
+ k = bkey_i_to_s_c(&prev_key.k);
} else {
- bkey_copy(&prev_key.k, k);
+ bkey_copy(&prev_key.k, &tmp.k);
}
}
- if (i->keys_found + k->u64s >
+ if (i->keys_found + k.k->u64s >
i->buf_size / sizeof(u64)) {
ret = -ENOSPC;
break;
@@ -319,12 +320,19 @@ static int __bch_list_keys(struct cache_set *c, struct bch_ioctl_list_keys *i)
if (copy_to_user(((u64 __user *) (unsigned long)
i->buf) + i->keys_found,
- k, bkey_bytes(k))) {
+ k.k, sizeof(*k.k))) {
ret = -EFAULT;
break;
}
- i->keys_found += k->u64s;
+ if (copy_to_user(((u64 __user *) (unsigned long) i->buf) +
+ i->keys_found + BKEY_U64s,
+ k.v, bkey_val_bytes(k.k))) {
+ ret = -EFAULT;
+ break;
+ }
+
+ i->keys_found += k.k->u64s;
}
bch_btree_iter_unlock(&iter);
@@ -390,7 +398,7 @@ static long bch_ioctl_inode_update(struct cache_set *c, unsigned long arg)
if (copy_from_user(&i, user_i, sizeof(i)))
return -EFAULT;
- if (bkey_invalid(c, BKEY_TYPE_INODES, &i.inode.k)) {
+ if (bkey_invalid(c, BKEY_TYPE_INODES, bkey_i_to_s_c(&i.inode.k_i))) {
char status[80];
bch_inode_status(status, sizeof(status), &i.inode.k);
@@ -398,7 +406,7 @@ static long bch_ioctl_inode_update(struct cache_set *c, unsigned long arg)
return -EINVAL;
}
- bch_inode_update(c, &i.inode.k);
+ bch_inode_update(c, &i.inode.k_i);
return 0;
}
@@ -412,7 +420,7 @@ static long bch_ioctl_inode_create(struct cache_set *c, unsigned long arg)
if (copy_from_user(&i, user_i, sizeof(i)))
return -EFAULT;
- ret = bch_inode_create(c, &i.inode.k, 0, BLOCKDEV_INODE_MAX,
+ ret = bch_inode_create(c, &i.inode.k_i, 0, BLOCKDEV_INODE_MAX,
&c->unused_inode_hint);
if (ret)
return ret;
@@ -540,29 +548,29 @@ int bch_copy(struct cache_set *c,
bch_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, src);
while (1) {
- const struct bkey *k = bch_btree_iter_peek(&iter);
+ struct bkey_s_c k = bch_btree_iter_peek(&iter);
BKEY_PADDED(key) copy;
- if (!k || bkey_cmp(bkey_start_pos(k), src_end) >= 0) {
+ if (!k.k || bkey_cmp(bkey_start_pos(k.k), src_end) >= 0) {
if (!bch_keylist_empty(&keys->list))
goto insert; /* insert any keys on our keylist */
break;
}
/* cut pointers to size */
- bkey_copy(&copy.key, k);
+ bkey_reassemble(&copy.key, k);
bch_cut_front(src, &copy.key);
- bch_cut_back(src_end, &copy.key);
+ bch_cut_back(src_end, &copy.key.k);
/* modify copy to reference destination */
- copy.key.p.inode = dst.inode;
- copy.key.p.offset -= src.offset;
- copy.key.p.offset += dst.offset;
+ copy.key.k.p.inode = dst.inode;
+ copy.key.k.p.offset -= src.offset;
+ copy.key.k.p.offset += dst.offset;
if (version != ((u64) 0ULL)) {
/* Version 0 means retain the source version */
/* Else use the new version */
- copy.key.version = version;
+ copy.key.k.version = version;
}
/*
@@ -570,7 +578,7 @@ int bch_copy(struct cache_set *c,
* key; otherwise insert what we've slurped up so far (and don't
* advance iter yet)
*/
- if (!bch_scan_keylist_add(keys, &copy.key)) {
+ if (!bch_scan_keylist_add(keys, bkey_i_to_s_c(&copy.key))) {
bch_btree_iter_advance_pos(&iter);
continue;
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index da79166fe030..87c6a115a785 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -54,10 +54,10 @@ static struct jset_entry *bch_journal_find_entry(struct jset *j, unsigned type,
return NULL;
}
-struct bkey *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
- enum btree_id id, unsigned *level)
+struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
+ enum btree_id id, unsigned *level)
{
- struct bkey *k;
+ struct bkey_i *k;
struct jset_entry *jkeys =
bch_journal_find_entry(j, JKEYS_BTREE_ROOT, id);
@@ -67,8 +67,8 @@ struct bkey *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
k = jkeys->start;
*level = jkeys->level;
- if (!jkeys->u64s || jkeys->u64s != k->u64s ||
- bkey_invalid(c, BKEY_TYPE_BTREE, k)) {
+ if (!jkeys->u64s || jkeys->u64s != k->k.u64s ||
+ bkey_invalid(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(k))) {
bch_cache_set_error(c, "invalid btree root in journal");
return NULL;
}
@@ -78,9 +78,9 @@ struct bkey *bch_journal_find_btree_root(struct cache_set *c, struct jset *j,
}
static void bch_journal_add_btree_root(struct jset *j, enum btree_id id,
- struct bkey *k, unsigned level)
+ struct bkey_i *k, unsigned level)
{
- bch_journal_add_entry(j, k, k->u64s, JKEYS_BTREE_ROOT, id, level);
+ bch_journal_add_entry(j, k, k->k.u64s, JKEYS_BTREE_ROOT, id, level);
}
static inline void bch_journal_add_prios(struct cache_set *c, struct jset *j)
@@ -486,7 +486,7 @@ const char *bch_journal_read(struct cache_set *c, struct list_head *list)
void bch_journal_mark(struct cache_set *c, struct list_head *list)
{
- struct bkey *k;
+ struct bkey_i *k;
struct jset_entry *j;
struct journal_replay *r;
@@ -494,19 +494,21 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
for_each_jset_key(k, j, &r->j) {
if ((j->level || j->btree_id == BTREE_ID_EXTENTS) &&
!bkey_invalid(c, j->level
- ? BKEY_TYPE_BTREE : j->btree_id, k))
- __bch_btree_mark_key(c, j->level, k);
+ ? BKEY_TYPE_BTREE : j->btree_id,
+ bkey_i_to_s_c(k)))
+ __bch_btree_mark_key(c, j->level,
+ bkey_i_to_s_c(k));
}
}
static int bch_journal_replay_key(struct cache_set *c, enum btree_id id,
- struct bkey *k)
+ struct bkey_i *k)
{
int ret;
BKEY_PADDED(key) temp;
- bool do_subtract = id == BTREE_ID_EXTENTS && k->type == BCH_EXTENT;
+ bool do_subtract = id == BTREE_ID_EXTENTS && k->k.type == BCH_EXTENT;
- trace_bcache_journal_replay_key(k);
+ trace_bcache_journal_replay_key(&k->k);
if (do_subtract)
bkey_copy(&temp.key, k);
@@ -520,9 +522,9 @@ static int bch_journal_replay_key(struct cache_set *c, enum btree_id id,
* them again
*/
if (do_subtract)
- __bch_add_sectors(c, NULL, bkey_i_to_extent_c(&temp.key),
- bkey_start_offset(&temp.key),
- -temp.key.size, false);
+ __bch_add_sectors(c, NULL, bkey_i_to_s_c_extent(&temp.key),
+ bkey_start_offset(&temp.key.k),
+ -temp.key.k.size, false);
return 0;
}
@@ -530,7 +532,7 @@ static int bch_journal_replay_key(struct cache_set *c, enum btree_id id,
int bch_journal_replay(struct cache_set *c, struct list_head *list)
{
int ret = 0, keys = 0, entries = 0;
- struct bkey *k;
+ struct bkey_i *k;
struct jset_entry *jkeys;
struct journal_replay *i =
list_entry(list->prev, struct journal_replay, list);
@@ -724,14 +726,14 @@ static size_t journal_write_u64s_remaining(struct cache_set *c)
static void journal_entry_no_room(struct cache_set *c)
{
- struct bkey_i_extent *e = bkey_i_to_extent(&c->journal.key);
+ struct bkey_s_extent e = bkey_i_to_s_extent(&c->journal.key);
struct bch_extent_ptr *ptr;
struct cache *ca;
extent_for_each_ptr_backwards(e, ptr)
if (!(ca = PTR_CACHE(c, ptr)) ||
ca->journal.sectors_free <= c->journal.sectors_free)
- bch_extent_drop_ptr(&e->k, ptr - e->v.ptr);
+ bch_extent_drop_ptr(e, ptr - e.v->ptr);
c->journal.sectors_free = 0;
c->journal.u64s_remaining = 0;
@@ -882,7 +884,7 @@ static bool journal_reclaim_fast(struct cache_set *c)
*/
static void journal_next_bucket(struct cache_set *c)
{
- struct bkey_i_extent *e = bkey_i_to_extent(&c->journal.key);
+ struct bkey_s_extent e = bkey_i_to_s_extent(&c->journal.key);
struct bch_extent_ptr *ptr;
struct cache *ca;
unsigned iter;
@@ -910,7 +912,7 @@ static void journal_next_bucket(struct cache_set *c)
if (!(ca = PTR_CACHE(c, ptr)) ||
!ca->journal.sectors_free ||
CACHE_STATE(&ca->mi) != CACHE_ACTIVE)
- bch_extent_drop_ptr(&e->k, ptr - e->v.ptr);
+ bch_extent_drop_ptr(e, ptr - e.v->ptr);
/*
* Determine location of the next journal write:
@@ -930,7 +932,8 @@ static void journal_next_bucket(struct cache_set *c)
*/
if ((CACHE_TIER(&ca->mi) != 0) ||
(CACHE_STATE(&ca->mi) != CACHE_ACTIVE) ||
- bch_extent_has_device(e, ca->sb.nr_this_dev))
+ bch_extent_has_device(extent_s_to_s_c(e),
+ ca->sb.nr_this_dev))
continue;
/* No journal buckets available for writing on this device */
@@ -942,7 +945,7 @@ static void journal_next_bucket(struct cache_set *c)
ja->sectors_free = ca->mi.bucket_size;
ja->cur_idx = next;
- e->v.ptr[bch_extent_ptrs(e)] =
+ e.v->ptr[bch_extent_ptrs(e)] =
PTR(0, bucket_to_sector(ca, journal_bucket(ca,
ja->cur_idx)),
ca->sb.nr_this_dev);
@@ -1021,7 +1024,7 @@ static void journal_write_locked(struct closure *cl)
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
struct cache *ca;
struct journal_write *w = c->journal.cur;
- struct bkey_i_extent *e = bkey_i_to_extent(&c->journal.key);
+ struct bkey_s_extent e = bkey_i_to_s_extent(&c->journal.key);
struct bch_extent_ptr *ptr;
BKEY_PADDED(k) tmp;
unsigned i, sectors;
@@ -1119,7 +1122,7 @@ static void journal_write_locked(struct closure *cl)
* Make a copy of the key we're writing to for check_mark_super, since
* journal_next_bucket will change it
*/
- bkey_copy(&tmp.k, &e->k);
+ bkey_reassemble(&tmp.k, to_bkey_s_c(e));
atomic_dec_bug(&fifo_back(&c->journal.pin));
bch_journal_next(&c->journal);
@@ -1314,17 +1317,17 @@ void bch_journal_set_dirty(struct cache_set *c)
}
void bch_journal_add_keys(struct cache_set *c, struct journal_res *res,
- enum btree_id id, const struct bkey *k,
+ enum btree_id id, const struct bkey_i *k,
unsigned level)
{
- unsigned actual = jset_u64s(k->u64s);
+ unsigned actual = jset_u64s(k->k.u64s);
BUG_ON(!res->ref);
BUG_ON(actual > res->nkeys);
res->nkeys -= actual;
spin_lock(&c->journal.lock);
- bch_journal_add_entry(c->journal.cur->data, k, k->u64s,
+ bch_journal_add_entry(c->journal.cur->data, k, k->k.u64s,
JKEYS_BTREE_KEYS, id, level);
bch_journal_set_dirty(c);
spin_unlock(&c->journal.lock);
@@ -1403,7 +1406,7 @@ static bool bch_journal_writing_to_device(struct cache *ca)
bool ret;
spin_lock(&c->journal.lock);
- ret = bch_extent_has_device(bkey_i_to_extent(&c->journal.key),
+ ret = bch_extent_has_device(bkey_i_to_s_c_extent(&c->journal.key),
ca->sb.nr_this_dev);
spin_unlock(&c->journal.lock);
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index 3723c6cce610..3613b3eb8c78 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -138,8 +138,8 @@ struct closure;
struct cache_set;
struct keylist;
-struct bkey *bch_journal_find_btree_root(struct cache_set *, struct jset *,
- enum btree_id, unsigned *);
+struct bkey_i *bch_journal_find_btree_root(struct cache_set *, struct jset *,
+ enum btree_id, unsigned *);
struct journal_res {
unsigned ref:1;
@@ -152,7 +152,7 @@ void bch_journal_res_get(struct cache_set *, struct journal_res *,
unsigned, unsigned);
void bch_journal_set_dirty(struct cache_set *);
void bch_journal_add_keys(struct cache_set *, struct journal_res *,
- enum btree_id, const struct bkey *,
+ enum btree_id, const struct bkey_i *,
unsigned);
static inline void bch_journal_res_put(struct cache_set *c,
diff --git a/drivers/md/bcache/keybuf.c b/drivers/md/bcache/keybuf.c
index fff8654997db..aaf300d858d4 100644
--- a/drivers/md/bcache/keybuf.c
+++ b/drivers/md/bcache/keybuf.c
@@ -14,9 +14,9 @@
static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r)
{
/* Overlapping keys compare equal */
- if (bkey_cmp(l->key.p, bkey_start_pos(&r->key)) <= 0)
+ if (bkey_cmp(l->key.k.p, bkey_start_pos(&r->key.k)) <= 0)
return -1;
- if (bkey_cmp(bkey_start_pos(&l->key), r->key.p) >= 0)
+ if (bkey_cmp(bkey_start_pos(&l->key.k), r->key.k.p) >= 0)
return 1;
return 0;
}
@@ -24,7 +24,7 @@ static inline int keybuf_cmp(struct keybuf_key *l, struct keybuf_key *r)
static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
struct keybuf_key *r)
{
- return clamp_t(int64_t, bkey_cmp(l->key.p, r->key.p), -1, 1);
+ return clamp_t(int64_t, bkey_cmp(l->key.k.p, r->key.k.p), -1, 1);
}
/*
@@ -58,14 +58,14 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
{
struct bpos start = buf->last_scanned;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
unsigned nr_found = 0;
cond_resched();
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, k, buf->last_scanned) {
- if (bkey_cmp(k->p, end) >= 0) {
- buf->last_scanned = k->p;
+ if (bkey_cmp(k.k->p, end) >= 0) {
+ buf->last_scanned = k.k->p;
goto done;
}
@@ -80,7 +80,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
goto done;
}
- bkey_copy(&w->key, k);
+ bkey_reassemble(&w->key, k);
atomic_set(&w->ref, -1); /* -1 means hasn't started */
if (RB_INSERT(&buf->keys, w, node, keybuf_cmp))
@@ -91,7 +91,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
spin_unlock(&buf->lock);
}
- buf->last_scanned = k->p;
+ buf->last_scanned = k.k->p;
}
/* If we end up here, it means:
@@ -113,10 +113,10 @@ done:
if (!RB_EMPTY_ROOT(&buf->keys)) {
struct keybuf_key *w;
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
- buf->start = bkey_start_pos(&w->key);
+ buf->start = bkey_start_pos(&w->key.k);
w = RB_LAST(&buf->keys, struct keybuf_key, node);
- buf->end = w->key.p;
+ buf->end = w->key.k.p;
} else {
buf->start = POS_MAX;
buf->end = POS_MAX;
@@ -152,7 +152,8 @@ void bch_keybuf_recalc_oldest_gens(struct cache_set *c, struct keybuf *buf)
rcu_read_lock();
rbtree_postorder_for_each_entry_safe(w, n,
&buf->keys, node)
- bch_btree_key_recalc_oldest_gen(c, bkey_i_to_extent(&w->key));
+ bch_btree_key_recalc_oldest_gen(c,
+ bkey_i_to_s_c_extent(&w->key));
rcu_read_unlock();
spin_unlock(&buf->lock);
}
@@ -161,7 +162,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bpos start,
struct bpos end)
{
bool ret = false;
- struct keybuf_key *w, *next, s = { .key.p = start };
+ struct keybuf_key *w, *next, s = { .key.k.p = start };
if (bkey_cmp(end, buf->start) <= 0 ||
bkey_cmp(start, buf->end) >= 0)
@@ -170,7 +171,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bpos start,
spin_lock(&buf->lock);
for (w = RB_GREATER(&buf->keys, s, node, keybuf_nonoverlapping_cmp);
- w && bkey_cmp(bkey_start_pos(&w->key), end) < 0;
+ w && bkey_cmp(bkey_start_pos(&w->key.k), end) < 0;
w = next) {
next = RB_NEXT(w, node);
diff --git a/drivers/md/bcache/keybuf.h b/drivers/md/bcache/keybuf.h
index 4686d7f8a892..5ece121a5c6b 100644
--- a/drivers/md/bcache/keybuf.h
+++ b/drivers/md/bcache/keybuf.h
@@ -3,7 +3,7 @@
#include "keybuf_types.h"
-typedef bool (keybuf_pred_fn)(struct keybuf *, const struct bkey *);
+typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey_s_c);
void bch_keybuf_init(struct keybuf *, unsigned size, unsigned in_flight);
void bch_refill_keybuf(struct cache_set *, struct keybuf *,
diff --git a/drivers/md/bcache/keylist.c b/drivers/md/bcache/keylist.c
index f17351051909..7fcd48271631 100644
--- a/drivers/md/bcache/keylist.c
+++ b/drivers/md/bcache/keylist.c
@@ -94,9 +94,9 @@ int bch_keylist_realloc(struct keylist *l, unsigned needu64s)
return bch_keylist_realloc_max(l, needu64s, KEYLIST_MAX);
}
-void bch_keylist_add_in_order(struct keylist *l, struct bkey *insert)
+void bch_keylist_add_in_order(struct keylist *l, struct bkey_i *insert)
{
- struct bkey *where = l->bot;
+ struct bkey_i *where = l->bot;
/*
* Shouldn't fire since we only use this on a fresh keylist
@@ -105,14 +105,14 @@ void bch_keylist_add_in_order(struct keylist *l, struct bkey *insert)
BUG_ON(l->top_p < l->bot_p);
while (where != l->top &&
- bkey_cmp(insert->p, where->p) >= 0)
+ bkey_cmp(insert->k.p, where->k.p) >= 0)
where = bkey_next(where);
- memmove((u64 *) where + insert->u64s,
+ memmove((u64 *) where + insert->k.u64s,
where,
((void *) l->top) - ((void *) where));
- l->top_p += insert->u64s;
+ l->top_p += insert->k.u64s;
BUG_ON(l->top_p > l->end_keys_p);
bkey_copy(where, insert);
}
@@ -199,31 +199,32 @@ for (k = ACCESS_ONCE((l)->bot); \
*/
void bch_keylist_recalc_oldest_gens(struct cache_set *c, struct scan_keylist *kl)
{
- struct bkey *k;
+ struct bkey_i *k;
mutex_lock(&kl->lock);
keylist_for_each(k, &kl->list) {
rcu_read_lock();
- bch_btree_key_recalc_oldest_gen(c, bkey_i_to_extent(k));
+ bch_btree_key_recalc_oldest_gen(c, bkey_i_to_s_c_extent(k));
rcu_read_unlock();
}
mutex_unlock(&kl->lock);
}
-int bch_scan_keylist_add(struct scan_keylist *kl, const struct bkey *k)
+int bch_scan_keylist_add(struct scan_keylist *kl, struct bkey_s_c k)
{
int ret;
mutex_lock(&kl->lock);
ret = bch_keylist_realloc_max(&kl->list,
- k->u64s,
+ k.k->u64s,
kl->max_size);
if (!ret) {
- bch_keylist_add(&kl->list, k);
- atomic64_add(k->size, &kl->sectors);
+ bkey_reassemble(kl->list.top, k);
+ bch_keylist_enqueue(&kl->list);
+ atomic64_add(k.k->size, &kl->sectors);
}
mutex_unlock(&kl->lock);
@@ -240,14 +241,14 @@ static void bch_refill_scan_keylist(struct cache_set *c,
{
struct bpos start = *last_scanned;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
unsigned nr_found = 0;
cond_resched();
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, k, *last_scanned) {
- if (bkey_cmp(k->p, end) >= 0) {
- *last_scanned = k->p;
+ if (bkey_cmp(k.k->p, end) >= 0) {
+ *last_scanned = k.k->p;
goto done;
}
@@ -258,7 +259,7 @@ static void bch_refill_scan_keylist(struct cache_set *c,
nr_found++;
}
- *last_scanned = k->p;
+ *last_scanned = k.k->p;
}
/* If we end up here, it means:
@@ -276,7 +277,7 @@ done:
last_scanned->offset);
}
-struct bkey *bch_scan_keylist_next(struct scan_keylist *kl)
+struct bkey_i *bch_scan_keylist_next(struct scan_keylist *kl)
{
if (bch_keylist_empty(&kl->list))
return NULL;
@@ -284,11 +285,11 @@ struct bkey *bch_scan_keylist_next(struct scan_keylist *kl)
return bch_keylist_front(&kl->list);
}
-struct bkey *bch_scan_keylist_next_rescan(struct cache_set *c,
- struct scan_keylist *kl,
- struct bpos *last_scanned,
- struct bpos end,
- scan_keylist_pred_fn *pred)
+struct bkey_i *bch_scan_keylist_next_rescan(struct cache_set *c,
+ struct scan_keylist *kl,
+ struct bpos *last_scanned,
+ struct bpos end,
+ scan_keylist_pred_fn *pred)
{
if (bch_keylist_empty(&kl->list)) {
if (bkey_cmp(*last_scanned, end) >= 0)
@@ -305,7 +306,7 @@ void bch_scan_keylist_dequeue(struct scan_keylist *kl)
uint64_t sectors;
mutex_lock(&kl->lock);
- sectors = kl->list.bot->size;
+ sectors = kl->list.bot->k.size;
bch_keylist_dequeue(&kl->list);
mutex_unlock(&kl->lock);
diff --git a/drivers/md/bcache/keylist.h b/drivers/md/bcache/keylist.h
index 4cfd295ef41a..b5fe5515a0f8 100644
--- a/drivers/md/bcache/keylist.h
+++ b/drivers/md/bcache/keylist.h
@@ -37,15 +37,15 @@ void bch_scan_keylist_destroy(struct scan_keylist *kl);
* copying the structure.
*/
-struct bkey *bch_scan_keylist_next(struct scan_keylist *);
+struct bkey_i *bch_scan_keylist_next(struct scan_keylist *);
-struct bkey *bch_scan_keylist_next_rescan(struct cache_set *c,
- struct scan_keylist *kl,
- struct bpos *last_scanned,
- struct bpos end,
- scan_keylist_pred_fn *pred);
+struct bkey_i *bch_scan_keylist_next_rescan(struct cache_set *c,
+ struct scan_keylist *kl,
+ struct bpos *last_scanned,
+ struct bpos end,
+ scan_keylist_pred_fn *pred);
-int bch_scan_keylist_add(struct scan_keylist *, const struct bkey *);
+int bch_scan_keylist_add(struct scan_keylist *, struct bkey_s_c);
void bch_scan_keylist_dequeue(struct scan_keylist *);
void bch_keylist_recalc_oldest_gens(struct cache_set *, struct scan_keylist *);
diff --git a/drivers/md/bcache/keylist_types.h b/drivers/md/bcache/keylist_types.h
index 1ee4aca2cfb6..bdc85642ac6d 100644
--- a/drivers/md/bcache/keylist_types.h
+++ b/drivers/md/bcache/keylist_types.h
@@ -47,6 +47,6 @@ struct scan_keylist {
struct moving_queue *owner;
};
-typedef bool (scan_keylist_pred_fn)(struct scan_keylist *, const struct bkey *);
+typedef bool (scan_keylist_pred_fn)(struct scan_keylist *, struct bkey_s_c);
#endif /* _BCACHE_KEYLIST_TYPES_H */
diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c
index 07303a8ef502..a8b4761c7672 100644
--- a/drivers/md/bcache/move.c
+++ b/drivers/md/bcache/move.c
@@ -83,25 +83,25 @@ static void moving_init(struct moving_io *io)
bio_get(bio);
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
- bio->bi_iter.bi_size = io->key.size << 9;
- bio->bi_max_vecs = DIV_ROUND_UP(io->key.size,
+ bio->bi_iter.bi_size = io->key.k.size << 9;
+ bio->bi_max_vecs = DIV_ROUND_UP(io->key.k.size,
PAGE_SECTORS);
bio->bi_private = &io->cl;
bio->bi_io_vec = bio->bi_inline_vecs;
bch_bio_map(bio, NULL);
}
-struct moving_io *moving_io_alloc(const struct bkey *k)
+struct moving_io *moving_io_alloc(struct bkey_s_c k)
{
struct moving_io *io;
io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
- * DIV_ROUND_UP(k->size, PAGE_SECTORS),
+ * DIV_ROUND_UP(k.k->size, PAGE_SECTORS),
GFP_KERNEL);
if (!io)
return NULL;
- bkey_copy(&io->key, k);
+ bkey_reassemble(&io->key, k);
moving_init(io);
@@ -135,7 +135,7 @@ static void moving_io_destructor(struct closure *cl)
bool kick_writes = true;
if (io->op.replace_collision)
- trace_bcache_copy_collision(q, &io->key);
+ trace_bcache_copy_collision(q, &io->key.k);
spin_lock_irqsave(&q->lock, flags);
@@ -150,7 +150,7 @@ static void moving_io_destructor(struct closure *cl)
if (io->write_issued) {
BUG_ON(!q->write_count);
q->write_count--;
- trace_bcache_move_write_done(q, &io->key);
+ trace_bcache_move_write_done(q, &io->key.k);
}
list_del_init(&io->list);
@@ -209,7 +209,7 @@ static void write_moving(struct moving_io *io)
else {
moving_init(io);
- op->bio->bi_iter.bi_sector = bkey_start_offset(&io->key);
+ op->bio->bi_iter.bi_sector = bkey_start_offset(&io->key.k);
closure_call(&op->cl, bch_write, NULL, &io->cl);
closure_return_with_destructor(&io->cl, moving_io_after_write);
@@ -251,7 +251,7 @@ static void bch_queue_write_work(struct work_struct *work)
io->write_issued = 1;
list_del(&io->list);
list_add_tail(&io->list, &q->write_pending);
- trace_bcache_move_write(q, &io->key);
+ trace_bcache_move_write(q, &io->key.k);
spin_unlock_irqrestore(&q->lock, flags);
write_moving(io);
spin_lock_irqsave(&q->lock, flags);
@@ -416,7 +416,8 @@ static void pending_recalc_oldest_gens(struct cache_set *c, struct list_head *l)
* to open buckets until the write completes
*/
rcu_read_lock();
- bch_btree_key_recalc_oldest_gen(c, bkey_i_to_extent(&io->key));
+ bch_btree_key_recalc_oldest_gen(c,
+ bkey_i_to_s_c_extent(&io->key));
rcu_read_unlock();
}
}
@@ -459,7 +460,7 @@ static void read_moving_endio(struct bio *bio, int error)
spin_lock_irqsave(&q->lock, flags);
- trace_bcache_move_read_done(q, &io->key);
+ trace_bcache_move_read_done(q, &io->key.k);
BUG_ON(!io->read_issued);
BUG_ON(io->read_completed);
@@ -485,11 +486,11 @@ static void __bch_data_move(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct cache *ca;
- u64 size = io->key.size;
const struct bch_extent_ptr *ptr;
+ u64 size = io->key.k.size;
- ca = bch_extent_pick_ptr_avoiding(io->op.c, &io->key, &ptr,
- io->context->avoid);
+ ca = bch_extent_pick_ptr_avoiding(io->op.c, bkey_i_to_s_c(&io->key),
+ &ptr, io->context->avoid);
if (IS_ERR_OR_NULL(ca))
closure_return_with_destructor(cl, moving_io_destructor);
@@ -562,7 +563,7 @@ void bch_data_move(struct moving_queue *q,
q->count++;
list_add_tail(&io->list, &q->pending);
- trace_bcache_move_read(q, &io->key);
+ trace_bcache_move_read(q, &io->key.k);
if (q->rotational)
BUG_ON(RB_INSERT(&q->tree, io, node, moving_io_cmp));
@@ -647,14 +648,18 @@ sync:
closure_sync(&ctxt->cl);
}
-static bool migrate_data_pred(struct scan_keylist *kl, const struct bkey *k)
+static bool migrate_data_pred(struct scan_keylist *kl, struct bkey_s_c k)
{
struct cache *ca = container_of(kl, struct cache,
moving_gc_queue.keys);
- return k->type == BCH_EXTENT &&
- bch_extent_has_device(bkey_i_to_extent_c(k),
- ca->sb.nr_this_dev);
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ return bch_extent_has_device(bkey_s_c_to_extent(k),
+ ca->sb.nr_this_dev);
+ default:
+ return false;
+ }
}
#if (0)
@@ -752,17 +757,17 @@ enum migrate_option {
};
static enum migrate_option migrate_cleanup_key(struct cache_set *c,
- struct bkey *k,
+ struct bkey_i *k,
struct cache *ca)
{
- struct bkey_i_extent *e = bkey_i_to_extent(k);
+ struct bkey_s_extent e = bkey_i_to_s_extent(k);
struct bch_extent_ptr *ptr;
bool found;
found = false;
extent_for_each_ptr_backwards(e, ptr)
if (PTR_DEV(ptr) == ca->sb.nr_this_dev) {
- bch_extent_drop_ptr(&e->k, ptr - e->v.ptr);
+ bch_extent_drop_ptr(e, ptr - e.v->ptr);
found = true;
}
@@ -784,7 +789,7 @@ static enum migrate_option migrate_cleanup_key(struct cache_set *c,
static int issue_migration_move(struct cache *ca,
struct moving_context *ctxt,
- struct bkey *k,
+ struct bkey_s_c k,
uint64_t *seen_key_count)
{
enum migrate_option option;
@@ -816,9 +821,7 @@ static int issue_migration_move(struct cache *ca,
BUG_ON(q->wq == NULL);
io->op.io_wq = q->wq;
- k = &io->op.insert_key;
-
- option = migrate_cleanup_key(c, k, ca);
+ option = migrate_cleanup_key(c, &io->op.insert_key, ca);
switch (option) {
default:
@@ -875,7 +878,7 @@ static int issue_migration_move(struct cache *ca,
int bch_move_data_off_device(struct cache *ca)
{
int ret;
- struct bkey *k;
+ struct bkey_i *k;
unsigned pass;
u64 seen_key_count;
unsigned last_error_count;
@@ -967,7 +970,7 @@ again:
if (k == NULL)
break;
- if (issue_migration_move(ca, &context, k,
+ if (issue_migration_move(ca, &context, bkey_i_to_s_c(k),
&seen_key_count)) {
/*
* Memory allocation failed; we will wait for all
@@ -1033,7 +1036,7 @@ static int bch_move_btree_off(struct cache *ca,
for_each_btree_node(&iter, ca->set, id, b, POS_MIN) {
seen++;
retry:
- if (!bch_extent_has_device(bkey_i_to_extent_c(&b->key),
+ if (!bch_extent_has_device(bkey_i_to_s_c_extent(&b->key),
ca->sb.nr_this_dev))
continue;
@@ -1149,18 +1152,18 @@ int bch_move_meta_data_off_device(struct cache *ca)
static int bch_flag_key_bad(struct btree_iter *iter,
struct cache *ca,
- const struct bkey_i_extent *orig)
+ struct bkey_s_c_extent orig)
{
BKEY_PADDED(key) tmp;
- struct bkey_i_extent *e;
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct cache_set *c = ca->set;
bool found = false;
/* Iterate backwards because we might drop pointers */
- bkey_copy(&tmp.key, &orig->k);
- e = bkey_i_to_extent(&tmp.key);
+ bkey_reassemble(&tmp.key, to_bkey_s_c(orig));
+ e = bkey_i_to_s_extent(&tmp.key);
extent_for_each_ptr_backwards(e, ptr)
if (PTR_DEV(ptr) == ca->sb.nr_this_dev) {
@@ -1176,10 +1179,10 @@ static int bch_flag_key_bad(struct btree_iter *iter,
* because bch_extent_normalize() will sort it
* incorrectly but fortunately we don't need to.
*/
- if (bch_extent_ptr_is_dirty(c, e, ptr))
+ if (bch_extent_ptr_is_dirty(c, extent_s_to_s_c(e), ptr))
*ptr = PTR(0, 0, PTR_LOST_DEV);
else
- bch_extent_drop_ptr(&e->k, ptr - e->v.ptr);
+ bch_extent_drop_ptr(e, ptr - e.v->ptr);
}
if (!found)
@@ -1195,10 +1198,10 @@ static int bch_flag_key_bad(struct btree_iter *iter,
* in this case, bch_extent_normalize() will change the key type to
* DISCARD.
*/
- bch_extent_normalize(c, &e->k);
+ bch_extent_normalize(c, bkey_i_to_s(&tmp.key));
return bch_btree_insert_at(iter,
- &keylist_single(&e->k),
+ &keylist_single(&tmp.key),
NULL, /* replace_info */
NULL, /* closure */
0, /* reserve */
@@ -1219,7 +1222,7 @@ static int bch_flag_key_bad(struct btree_iter *iter,
int bch_flag_data_bad(struct cache *ca)
{
int ret = 0, ret2;
- const struct bkey *k;
+ struct bkey_s_c k;
struct btree_iter iter;
if (MIGRATION_DEBUG)
@@ -1227,9 +1230,10 @@ int bch_flag_data_bad(struct cache *ca)
bch_btree_iter_init(&iter, ca->set, BTREE_ID_EXTENTS, POS_MIN);
- while ((k = bch_btree_iter_peek(&iter))) {
- if (k->type == BCH_EXTENT) {
- ret = bch_flag_key_bad(&iter, ca, bkey_i_to_extent_c(k));
+ while ((k = bch_btree_iter_peek(&iter)).k) {
+ if (k.k->type == BCH_EXTENT) {
+ ret = bch_flag_key_bad(&iter, ca,
+ bkey_s_c_to_extent(k));
if (ret == -EINTR || ret == -EAGAIN)
continue;
@@ -1245,8 +1249,8 @@ int bch_flag_data_bad(struct cache *ca)
#ifdef CONFIG_BCACHE_DEBUG
if (!ret && !ret2)
for_each_btree_key(&iter, ca->set, BTREE_ID_EXTENTS, k, POS_MIN)
- BUG_ON(k->type == BCH_EXTENT &&
- bch_extent_has_device(bkey_i_to_extent_c(k),
+ BUG_ON(k.k->type == BCH_EXTENT &&
+ bch_extent_has_device(bkey_s_c_to_extent(k),
ca->sb.nr_this_dev));
bch_btree_iter_unlock(&iter);
diff --git a/drivers/md/bcache/move.h b/drivers/md/bcache/move.h
index f6ebfdbb3811..168f55a8fbdc 100644
--- a/drivers/md/bcache/move.h
+++ b/drivers/md/bcache/move.h
@@ -94,7 +94,7 @@ struct moving_io {
struct bbio bio;
};
-struct moving_io *moving_io_alloc(const struct bkey *);
+struct moving_io *moving_io_alloc(struct bkey_s_c);
typedef struct moving_io *(moving_queue_fn)(struct moving_queue *,
struct moving_context *);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index b8a248160324..bccacc5ce109 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -18,18 +18,18 @@
/* Moving GC - IO loop */
-static bool moving_pred(struct scan_keylist *kl, const struct bkey *k)
+static bool moving_pred(struct scan_keylist *kl, struct bkey_s_c k)
{
struct cache *ca = container_of(kl, struct cache,
moving_gc_queue.keys);
struct cache_set *c = ca->set;
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
const struct bch_extent_ptr *ptr;
bool ret = false;
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
rcu_read_lock();
extent_for_each_ptr(e, ptr)
@@ -46,16 +46,16 @@ static bool moving_pred(struct scan_keylist *kl, const struct bkey *k)
static int issue_moving_gc_move(struct moving_queue *q,
struct moving_context *ctxt,
- const struct bkey *k)
+ struct bkey_i *k)
{
struct cache *ca = container_of(q, struct cache, moving_gc_queue);
struct cache_set *c = ca->set;
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
struct moving_io *io;
struct write_point *wp;
unsigned gen;
- bool cached = EXTENT_CACHED(&e->v);
+ bool cached = EXTENT_CACHED(e.v);
u64 sort_key;
extent_for_each_ptr(e, ptr)
@@ -72,9 +72,9 @@ static int issue_moving_gc_move(struct moving_queue *q,
return 0;
found:
- io = moving_io_alloc(k);
+ io = moving_io_alloc(bkey_i_to_s_c(k));
if (!io) {
- trace_bcache_moving_gc_alloc_fail(c, k->size);
+ trace_bcache_moving_gc_alloc_fail(c, e.k->size);
return -ENOMEM;
}
@@ -84,14 +84,15 @@ found:
* key here, since extent_drop_ptr() might delete the
* first pointer, losing the cached status
*/
- bch_write_op_init(&io->op, c, &io->bio.bio, wp, k, k,
+ bch_write_op_init(&io->op, c, &io->bio.bio, wp,
+ bkey_i_to_s_c(k), bkey_i_to_s_c(k),
cached ? BCH_WRITE_CACHED : 0);
- io->op.btree_alloc_reserve = RESERVE_MOVINGGC_BTREE;
io->sort_key = sort_key;
- bch_extent_drop_ptr(&io->op.insert_key, ptr - e->v.ptr);
+ bch_extent_drop_ptr(bkey_i_to_s_extent(&io->op.insert_key),
+ ptr - e.v->ptr);
- trace_bcache_gc_copy(k);
+ trace_bcache_gc_copy(e.k);
/*
* IMPORTANT: We must call bch_data_move before we dequeue so
@@ -108,7 +109,7 @@ found:
static void read_moving(struct cache *ca, struct moving_context *ctxt)
{
- struct bkey *k;
+ struct bkey_i *k;
bool again;
bch_ratelimit_reset(&ca->moving_gc_pd.rate);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index fcadd612b37b..cff65c0a3fa6 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -352,7 +352,9 @@ static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s,
#endif
sectors = min(sectors, bio_sectors(bio) + reada);
- replace.key = KEY(s->inode, bio->bi_iter.bi_sector + sectors, sectors);
+ replace.key.k = KEY(s->inode,
+ bio->bi_iter.bi_sector + sectors,
+ sectors);
ret = bch_btree_insert_check_key(iter, &replace.key);
if (ret == -EINTR || ret == -EAGAIN)
@@ -363,17 +365,17 @@ static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s,
miss->bi_end_io = request_endio;
miss->bi_private = &s->cl;
- to_bbio(miss)->key = KEY(s->inode,
- bio_end_sector(miss),
- bio_sectors(miss));
+ to_bbio(miss)->key.k = KEY(s->inode,
+ bio_end_sector(miss),
+ bio_sectors(miss));
to_bbio(miss)->ca = NULL;
closure_get(&s->cl);
__cache_promote(s->iop.c, to_bbio(miss),
- &replace.key,
- &KEY(replace.key.p.inode,
- replace.key.p.offset,
- replace.key.size),
+ bkey_i_to_s_c(&replace.key),
+ bkey_to_s_c(&KEY(replace.key.k.p.inode,
+ replace.key.k.p.offset,
+ replace.key.k.size)),
BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED);
return 0;
@@ -409,7 +411,7 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
struct closure *cl = &s->cl;
struct bio *bio = &s->bio.bio;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
bch_increment_clock(s->iop.c, bio_sectors(&s->bio.bio), READ);
@@ -422,12 +424,13 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
struct cache *ca;
bool done;
retry:
- BUG_ON(bkey_cmp(bkey_start_pos(k),
+ BUG_ON(bkey_cmp(bkey_start_pos(k.k),
POS(s->inode, bio->bi_iter.bi_sector)) > 0);
- BUG_ON(bkey_cmp(k->p, POS(s->inode, bio->bi_iter.bi_sector)) <= 0);
+ BUG_ON(bkey_cmp(k.k->p,
+ POS(s->inode, bio->bi_iter.bi_sector)) <= 0);
- sectors = k->p.offset - bio->bi_iter.bi_sector;
+ sectors = k.k->p.offset - bio->bi_iter.bi_sector;
done = sectors >= bio_sectors(bio);
ca = bch_extent_pick_ptr(s->iop.c, k, &ptr);
@@ -445,22 +448,24 @@ retry:
goto retry;
}
} else {
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
PTR_BUCKET(ca, ptr)->read_prio =
s->iop.c->prio_clock[READ].hand;
- if (!EXTENT_CACHED(&e->v))
+ if (!EXTENT_CACHED(e.v))
s->read_dirty_data = true;
n = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
bbio = to_bbio(n);
- bch_bkey_copy_single_ptr(&bbio->key, k, ptr - e->v.ptr);
+ bch_bkey_copy_single_ptr(&bbio->key, k, ptr - e.v->ptr);
/* Trim the key to match what we're actually reading */
- bch_cut_front(POS(s->inode, n->bi_iter.bi_sector), &bbio->key);
- bch_cut_back(POS(s->inode, bio_end_sector(n)), &bbio->key);
+ bch_cut_front(POS(s->inode, n->bi_iter.bi_sector),
+ &bbio->key);
+ bch_cut_back(POS(s->inode, bio_end_sector(n)),
+ &bbio->key.k);
bch_bbio_prep(bbio, ca);
@@ -583,7 +588,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
flags |= BCH_WRITE_DISCARD;
bch_write_op_init(&s->iop, dc->disk.c, insert_bio, NULL,
- &insert_key, NULL, flags);
+ bkey_to_s_c(&insert_key), bkey_s_c_null, flags);
closure_call(&s->iop.cl, bch_write, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
@@ -736,7 +741,8 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio)
flags |= BCH_WRITE_DISCARD;
bch_write_op_init(&s->iop, d->c, bio, NULL,
- &KEY(s->inode, 0, 0), NULL, flags);
+ bkey_to_s_c(&KEY(s->inode, 0, 0)),
+ bkey_s_c_null, flags);
closure_call(&s->iop.cl, bch_write, NULL, &s->cl);
continue_at(&s->cl, search_free, NULL);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 7f25591f7897..793ccbb87a74 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -580,10 +580,10 @@ void bcache_write_super(struct cache_set *c)
}
void bch_check_mark_super_slowpath(struct cache_set *c,
- const struct bkey *k, bool meta)
+ const struct bkey_i *k, bool meta)
{
struct cache_member *mi;
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
down(&c->sb_write_mutex);
@@ -1126,7 +1126,7 @@ const char *bch_run_cache_set(struct cache_set *c)
for (id = 0; id < BTREE_ID_NR; id++) {
unsigned level;
- struct bkey *k;
+ struct bkey_i *k;
err = "bad btree root";
k = bch_journal_find_btree_root(c, j, id, &level);
@@ -1205,13 +1205,13 @@ const char *bch_run_cache_set(struct cache_set *c)
bch_journal_meta(c, &cl);
- bkey_inode_init(&inode.k);
+ bkey_inode_init(&inode.k_i);
inode.k.p.inode = BCACHE_ROOT_INO;
inode.v.i_mode = S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO;
inode.v.i_nlink = 2;
err = "error creating root directory";
- if (bch_inode_update(c, &inode.k))
+ if (bch_inode_update(c, &inode.k_i))
goto err;
}
diff --git a/drivers/md/bcache/super.h b/drivers/md/bcache/super.h
index 457c1c40c7e5..7e7d5d9bb7f6 100644
--- a/drivers/md/bcache/super.h
+++ b/drivers/md/bcache/super.h
@@ -75,12 +75,12 @@ u64 bch_checksum(unsigned, const void *, size_t);
})
void bch_check_mark_super_slowpath(struct cache_set *,
- const struct bkey *, bool);
+ const struct bkey_i *, bool);
static inline bool bch_check_super_marked(struct cache_set *c,
- const struct bkey *k, bool meta)
+ const struct bkey_i *k, bool meta)
{
- const struct bkey_i_extent *e = bkey_i_to_extent_c(k);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
const struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi = cache_member_info_get(c);
bool ret = true;
@@ -99,7 +99,7 @@ static inline bool bch_check_super_marked(struct cache_set *c,
}
static inline void bch_check_mark_super(struct cache_set *c,
- const struct bkey *k, bool meta)
+ const struct bkey_i *k, bool meta)
{
if (bch_check_super_marked(c, k, meta))
return;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 91540fd0de8f..3eab3a6dcbd5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -313,7 +313,7 @@ STORE(__cached_dev)
bch_write_bdev_super(dc, NULL);
if (dc->disk.c)
- bch_inode_update(dc->disk.c, &dc->disk.inode.k);
+ bch_inode_update(dc->disk.c, &dc->disk.inode.k_i);
mutex_unlock(&dc->disk.inode_lock);
@@ -454,7 +454,7 @@ STORE(__bch_flash_dev)
if (v < d->inode.v.i_inode.i_size)
bch_inode_truncate(d->c, d->inode.k.p.inode, v >> 9);
d->inode.v.i_inode.i_size = v;
- bch_inode_update(d->c, &d->inode.k);
+ bch_inode_update(d->c, &d->inode.k_i);
set_capacity(d->disk, d->inode.v.i_inode.i_size >> 9);
mutex_unlock(&d->inode_lock);
@@ -464,7 +464,7 @@ STORE(__bch_flash_dev)
mutex_lock(&d->inode_lock);
memcpy(d->inode.v.i_label, buf, SB_LABEL_SIZE);
- bch_inode_update(d->c, &d->inode.k);
+ bch_inode_update(d->c, &d->inode.k_i);
mutex_unlock(&d->inode_lock);
}
@@ -520,7 +520,7 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
static unsigned bch_root_usage(struct cache_set *c)
{
unsigned bytes = 0;
- struct bkey *k;
+ struct bkey_packed *k;
struct btree *b;
struct btree_node_iter iter;
diff --git a/drivers/md/bcache/tier.c b/drivers/md/bcache/tier.c
index d5c75748026b..c9c5c95e017d 100644
--- a/drivers/md/bcache/tier.c
+++ b/drivers/md/bcache/tier.c
@@ -14,20 +14,20 @@
/**
* tiering_pred - check if tiering should copy an extent to tier 1
*/
-static bool tiering_pred(struct scan_keylist *kl, const struct bkey *k)
+static bool tiering_pred(struct scan_keylist *kl, struct bkey_s_c k)
{
struct cache *ca = container_of(kl, struct cache,
tiering_queue.keys);
struct cache_set *c = ca->set;
struct cache_member_rcu *mi;
- const struct bkey_i_extent *e;
+ struct bkey_s_c_extent e;
unsigned replicas = CACHE_SET_DATA_REPLICAS_WANT(&c->sb);
unsigned dev;
bool ret = false;
- switch (k->type) {
+ switch (k.k->type) {
case BCH_EXTENT:
- e = bkey_i_to_extent_c(k);
+ e = bkey_s_c_to_extent(k);
/*
* Should not happen except in a pathological situation (too
* many pointers on the wrong tier?
@@ -39,7 +39,7 @@ static bool tiering_pred(struct scan_keylist *kl, const struct bkey *k)
if (bch_extent_ptrs(e) < replicas)
return true;
- dev = PTR_DEV(&e->v.ptr[bch_extent_ptrs(e) - replicas]);
+ dev = PTR_DEV(&e.v->ptr[bch_extent_ptrs(e) - replicas]);
mi = cache_member_info_get(c);
ret = dev < mi->nr_in_set && !CACHE_TIER(&mi->m[dev]);
cache_member_info_put();
@@ -149,7 +149,7 @@ static void tiering_refill(struct cache_set *c, struct tiering_refill *refill)
{
struct scan_keylist *keys;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
if (bkey_cmp(refill->start, POS_MAX) >= 0)
return;
@@ -166,7 +166,7 @@ static void tiering_refill(struct cache_set *c, struct tiering_refill *refill)
keys = &refill->ca->tiering_queue.keys;
if (!tiering_pred(keys, k)) {
- refill->start = k->p;
+ refill->start = k.k->p;
continue;
}
@@ -175,8 +175,8 @@ static void tiering_refill(struct cache_set *c, struct tiering_refill *refill)
goto done;
/* TODO: split key if refill->sectors is now > stripe_size */
- refill->sectors += k->size;
- refill->start = k->p;
+ refill->sectors += k.k->size;
+ refill->start = k.k->p;
/* Check if we've added enough keys to this keylist */
if (tiering_keylist_full(refill)) {
@@ -209,7 +209,7 @@ done:
static int issue_tiering_move(struct moving_queue *q,
struct moving_context *ctxt,
- struct bkey *k)
+ struct bkey_s_c k)
{
struct cache *ca = container_of(q, struct cache, tiering_queue);
struct cache_set *c = ca->set;
@@ -217,17 +217,18 @@ static int issue_tiering_move(struct moving_queue *q,
io = moving_io_alloc(k);
if (!io) {
- trace_bcache_tiering_alloc_fail(c, k->size);
+ trace_bcache_tiering_alloc_fail(c, k.k->size);
return -ENOMEM;
}
bch_write_op_init(&io->op, c, &io->bio.bio,
&ca->tiering_write_point,
- &io->key, &io->key, 0);
+ bkey_i_to_s_c(&io->key),
+ bkey_i_to_s_c(&io->key), 0);
io->op.io_wq = q->wq;
io->op.btree_alloc_reserve = RESERVE_TIERING_BTREE;
- trace_bcache_tiering_copy(k);
+ trace_bcache_tiering_copy(k.k);
/*
* IMPORTANT: We must call bch_data_move before we dequeue so
@@ -254,7 +255,6 @@ static int tiering_next_cache(struct cache_set *c,
struct cache_group *tier;
int start = *cache_iter;
struct cache *ca;
- struct bkey *k;
/* If true at the end of the loop, all keylists were empty, so we
* have reached the end of the keyspace */
@@ -295,9 +295,11 @@ static int tiering_next_cache(struct cache_set *c,
if (bch_queue_full(&ca->tiering_queue)) {
done = false;
} else {
- k = bch_scan_keylist_next(&ca->tiering_queue.keys);
+ struct bkey_i *k =
+ bch_scan_keylist_next(&ca->tiering_queue.keys);
if (k) {
- issue_tiering_move(&ca->tiering_queue, ctxt, k);
+ issue_tiering_move(&ca->tiering_queue, ctxt,
+ bkey_i_to_s_c(k));
done = false;
full = false;
}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 8fbb376fd74c..235ed41d35fd 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -80,10 +80,10 @@ do { \
for (; _j * 2 + 1 < (h)->used; _j = _r) { \
_r = _j * 2 + 1; \
if (_r + 1 < (h)->used && \
- (cmp)((h)->data[_r], (h)->data[_r + 1])) \
+ cmp((h)->data[_r], (h)->data[_r + 1])) \
_r++; \
\
- if ((cmp)((h)->data[_r], (h)->data[_j])) \
+ if (cmp((h)->data[_r], (h)->data[_j])) \
break; \
heap_swap(h, _r, _j); \
} \
@@ -93,7 +93,7 @@ do { \
do { \
while (i) { \
size_t p = (i - 1) / 2; \
- if ((cmp)((h)->data[i], (h)->data[p])) \
+ if (cmp((h)->data[i], (h)->data[p])) \
break; \
heap_swap(h, i, p); \
i = p; \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 912760e7fa41..62a946608c83 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -79,8 +79,9 @@ static void dirty_init(struct dirty_io *io)
if (!io->dc->writeback_percent)
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
- bio->bi_iter.bi_size = io->replace.key.size << 9;
- bio->bi_max_vecs = DIV_ROUND_UP(io->replace.key.size, PAGE_SECTORS);
+ bio->bi_iter.bi_size = io->replace.key.k.size << 9;
+ bio->bi_max_vecs =
+ DIV_ROUND_UP(io->replace.key.k.size, PAGE_SECTORS);
bio->bi_io_vec = bio->bi_inline_vecs;
bch_bio_map(bio, NULL);
}
@@ -118,7 +119,7 @@ static void write_dirty_finish(struct closure *cl)
ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS,
&keys, &io->replace, NULL);
if (io->replace.successes == 0)
- trace_bcache_writeback_collision(&io->replace.key);
+ trace_bcache_writeback_collision(&io->replace.key.k);
atomic_long_inc(ret
? &dc->disk.c->writeback_keys_failed
@@ -135,7 +136,7 @@ static void dirty_endio(struct bio *bio, int error)
struct dirty_io *io = container_of(bio, struct dirty_io, bio);
if (error) {
- trace_bcache_writeback_error(&io->replace.key,
+ trace_bcache_writeback_error(&io->replace.key.k,
io->bio.bi_rw & WRITE,
error);
io->error = error;
@@ -151,7 +152,8 @@ static void write_dirty(struct closure *cl)
if (!io->error) {
dirty_init(io);
io->bio.bi_rw = WRITE;
- io->bio.bi_iter.bi_sector = bkey_start_offset(&io->replace.key);
+ io->bio.bi_iter.bi_sector =
+ bkey_start_offset(&io->replace.key.k);
io->bio.bi_bdev = io->dc->disk_sb.bdev;
io->bio.bi_end_io = dirty_endio;
@@ -203,18 +205,20 @@ static void read_dirty(struct cached_dev *dc)
bkey_copy(&tmp.k, &w->key);
- while (tmp.k.size) {
- ca = bch_extent_pick_ptr(dc->disk.c, &tmp.k, &ptr);
+ while (tmp.k.k.size) {
+ ca = bch_extent_pick_ptr(dc->disk.c,
+ bkey_i_to_s_c(&tmp.k),
+ &ptr);
if (IS_ERR_OR_NULL(ca))
break;
io = kzalloc(sizeof(*io) + sizeof(struct bio_vec) *
- DIV_ROUND_UP(tmp.k.size,
+ DIV_ROUND_UP(tmp.k.k.size,
PAGE_SECTORS),
GFP_KERNEL);
if (!io) {
trace_bcache_writeback_alloc_fail(ca->set,
- tmp.k.size);
+ tmp.k.k.size);
io = mempool_alloc(dc->writeback_io_pool,
GFP_KERNEL);
memset(io, 0, sizeof(*io) +
@@ -225,8 +229,8 @@ static void read_dirty(struct cached_dev *dc)
bkey_copy(&io->replace.key, &tmp.k);
if (DIRTY_IO_MEMPOOL_SECTORS <
- io->replace.key.size)
- bch_key_resize(&io->replace.key,
+ io->replace.key.k.size)
+ bch_key_resize(&io->replace.key.k,
DIRTY_IO_MEMPOOL_SECTORS);
} else {
bkey_copy(&io->replace.key, &tmp.k);
@@ -255,17 +259,17 @@ static void read_dirty(struct cached_dev *dc)
io->bio.bi_iter.bi_size =
io->bio.bi_vcnt * PAGE_SIZE;
- bch_key_resize(&io->replace.key,
+ bch_key_resize(&io->replace.key.k,
bio_sectors(&io->bio));
break;
}
}
- bch_cut_front(io->replace.key.p, &tmp.k);
- trace_bcache_writeback(&io->replace.key);
+ bch_cut_front(io->replace.key.k.p, &tmp.k);
+ trace_bcache_writeback(&io->replace.key.k);
bch_ratelimit_increment(&dc->writeback_pd.rate,
- io->replace.key.size << 9);
+ io->replace.key.k.size << 9);
closure_call(&io->cl, read_dirty_submit, NULL, &cl);
}
@@ -331,9 +335,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
rcu_read_unlock();
}
-static bool dirty_pred(struct keybuf *buf, const struct bkey *k)
+static bool dirty_pred(struct keybuf *buf, struct bkey_s_c k)
{
- return k->type == BCH_EXTENT &&
+ return k.k->type == BCH_EXTENT &&
!bkey_extent_cached(k);
}
@@ -370,7 +374,7 @@ static void refill_full_stripes(struct cached_dev *dc)
dirty_pred);
if (bch_keybuf_full(buf))
- return;
+ return;
stripe = next_stripe;
next:
@@ -495,7 +499,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc, struct cache_set *c)
{
struct bcache_device *d = &dc->disk;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
/*
* We have to do this before the disk is added to the radix tree or we
@@ -503,15 +507,14 @@ void bch_sectors_dirty_init(struct cached_dev *dc, struct cache_set *c)
*/
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, k,
POS(bcache_dev_inum(d), 0)) {
- if (k->p.inode > bcache_dev_inum(d))
+ if (k.k->p.inode > bcache_dev_inum(d))
break;
- if (k->type != BCH_EXTENT)
- continue;
-
- if (!bkey_extent_cached(k))
- __bcache_dev_sectors_dirty_add(d, bkey_start_offset(k),
- k->size);
+ if (k.k->type == BCH_EXTENT &&
+ EXTENT_CACHED(bkey_s_c_to_extent(k).v))
+ __bcache_dev_sectors_dirty_add(d,
+ bkey_start_offset(k.k),
+ k.k->size);
}
bch_btree_iter_unlock(&iter);
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index 1f8b3269241f..5964619e9d63 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -44,12 +44,12 @@ static int xattr_cmp(const struct bch_xattr *xattr, const struct qstr *q)
return len - q->len ?: memcmp(xattr->x_name, q->name, len);
}
-static bool bch_xattr_invalid(const struct cache_set *c, const struct bkey *k)
+static bool bch_xattr_invalid(const struct cache_set *c, struct bkey_s_c k)
{
- if (k->type != BCH_XATTR)
+ if (k.k->type != BCH_XATTR)
return true;
- if (bkey_bytes(k) < sizeof(struct bkey_i_xattr))
+ if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr))
return true;
return false;
@@ -68,7 +68,7 @@ static int bch_xattr_get(struct dentry *dentry, const char *name,
struct cache_set *c = dentry->d_inode->i_sb->s_fs_info;
struct qstr qname = (struct qstr) QSTR_INIT(name, strlen(name));
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
const struct bch_xattr *xattr;
if (strcmp(name, "") == 0)
@@ -78,10 +78,10 @@ static int bch_xattr_get(struct dentry *dentry, const char *name,
POS(dentry->d_inode->i_ino,
bch_xattr_hash(&qname))) {
/* hole, not found */
- if (k->type != BCH_XATTR)
+ if (k.k->type != BCH_XATTR)
break;
- xattr = &bkey_i_to_xattr_c(k)->v;
+ xattr = bkey_s_c_to_xattr(k).v;
/* collision? */
if (!xattr_cmp(xattr, &qname)) {
@@ -105,7 +105,7 @@ static int bch_xattr_set(struct dentry *dentry, const char *name,
{
struct cache_set *c = dentry->d_inode->i_sb->s_fs_info;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
struct qstr qname = (struct qstr) QSTR_INIT((char *) name, strlen(name));
int ret = -ENODATA;
@@ -113,18 +113,18 @@ static int bch_xattr_set(struct dentry *dentry, const char *name,
POS(dentry->d_inode->i_ino,
bch_xattr_hash(&qname)));
- while ((k = bch_btree_iter_peek_with_holes(&iter))) {
+ while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
struct keylist keys;
int ret;
/* hole, not found */
- if (k->type != BCH_XATTR) {
+ if (k.k->type != BCH_XATTR) {
if (flags & XATTR_REPLACE) {
ret = -ENODATA;
break;
}
} else {
- const struct bch_xattr *xattr = &bkey_i_to_xattr_c(k)->v;
+ const struct bch_xattr *xattr = bkey_s_c_to_xattr(k).v;
/* collision? */
if (xattr_cmp(xattr, &qname)) {
@@ -140,18 +140,18 @@ static int bch_xattr_set(struct dentry *dentry, const char *name,
bch_keylist_init(&keys);
- bkey_init(keys.top);
- keys.top->p = k->p;
+ bkey_init(&keys.top->k);
+ keys.top->k.p = k.k->p;
if (size) {
struct bch_xattr *xattr;
- keys.top->type = BCH_XATTR;
- set_bkey_val_bytes(keys.top,
+ keys.top->k.type = BCH_XATTR;
+ set_bkey_val_bytes(&keys.top->k,
sizeof(struct bch_xattr) +
qname.len + size);
- if (bch_keylist_realloc(&keys, keys.top->u64s)) {
+ if (bch_keylist_realloc(&keys, keys.top->k.u64s)) {
ret = -ENOMEM;
break;
}
@@ -164,7 +164,7 @@ static int bch_xattr_set(struct dentry *dentry, const char *name,
BUG_ON(xattr_cmp(xattr, &qname));
} else {
/* removing */
- set_bkey_deleted(keys.top);
+ set_bkey_deleted(&keys.top->k);
}
bch_keylist_enqueue(&keys);
@@ -201,22 +201,22 @@ ssize_t bch_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct cache_set *c = dentry->d_sb->s_fs_info;
struct btree_iter iter;
- const struct bkey *k;
+ struct bkey_s_c k;
const struct bch_xattr *xattr;
u64 inum = dentry->d_inode->i_ino;
ssize_t ret = 0;
size_t len;
for_each_btree_key(&iter, c, BTREE_ID_XATTRS, k, POS(inum, 0)) {
- BUG_ON(k->p.inode < inum);
+ BUG_ON(k.k->p.inode < inum);
- if (k->p.inode > inum)
+ if (k.k->p.inode > inum)
break;
- if (k->type != BCH_XATTR)
+ if (k.k->type != BCH_XATTR)
continue;
- xattr = &bkey_i_to_xattr_c(k)->v;
+ xattr = bkey_s_c_to_xattr(k).v;
len = bch_xattr_emit(dentry, xattr, buffer, buffer_size);
if (len > buffer_size) {
diff --git a/include/linux/bcache-kernel.h b/include/linux/bcache-kernel.h
index 3564fe736651..aa350de3fac3 100644
--- a/include/linux/bcache-kernel.h
+++ b/include/linux/bcache-kernel.h
@@ -9,7 +9,32 @@
#include <linux/wait.h>
struct cache_set;
-struct bkey;
+
+/* bkey with split value */
+struct bkey_s {
+ struct bkey *k;
+ struct bch_val *v;
+};
+
+/* bkey with split value, const */
+struct bkey_s_c {
+ const struct bkey *k;
+ const struct bch_val *v;
+};
+
+#define type_is(_val, _type) \
+ (__builtin_types_compatible_p(typeof(_val), _type) || \
+ __builtin_types_compatible_p(typeof(_val), const _type))
+
+#define bkey_next(_k) \
+({ \
+ BUILD_BUG_ON(!type_is(_k, struct bkey *) && \
+ !type_is(_k, struct bkey_i *) && \
+ !type_is(_k, struct bkey_packed *)); \
+ \
+ ((typeof(_k)) __bkey_idx(((struct bkey *) (_k)), \
+ ((struct bkey *) (_k))->u64s)); \
+})
#define __bkey_idx(_set, _offset) \
((_set)->_data + (_offset))
@@ -17,9 +42,6 @@ struct bkey;
#define bkey_idx(_set, _offset) \
((typeof(&(_set)->start[0])) __bkey_idx((_set), (_offset)))
-#define bkey_next(_k) \
- ((typeof(_k)) __bkey_idx(_k, (_k)->u64s))
-
#define __bset_bkey_last(_set) \
__bkey_idx((_set), (_set)->u64s)
@@ -55,22 +77,22 @@ struct bkey;
struct keylist {
/* This is a pointer to the LSB (inline_keys until realloc'd) */
union {
- struct bkey *start_keys;
+ struct bkey_i *start_keys;
uint64_t *start_keys_p;
};
/* This is a pointer to the next to enqueue */
union {
- struct bkey *top;
+ struct bkey_i *top;
uint64_t *top_p;
};
/* This is a pointer to the next to dequeue */
union {
- struct bkey *bot;
+ struct bkey_i *bot;
uint64_t *bot_p;
};
/* This is a pointer to beyond the MSB */
union {
- struct bkey *end_keys;
+ struct bkey_i *end_keys;
uint64_t *end_keys_p;
};
/* Enough room for btree_split's keys without realloc */
@@ -108,7 +130,8 @@ static inline bool bch_keylist_fits(struct keylist *l, size_t u64s)
return true;
}
-static inline struct bkey *__bch_keylist_next(struct keylist *l, struct bkey *k)
+static inline struct bkey_i *__bch_keylist_next(struct keylist *l,
+ struct bkey_i *k)
{
k = bkey_next(k);
BUG_ON(k > l->end_keys);
@@ -125,11 +148,11 @@ static inline struct bkey *__bch_keylist_next(struct keylist *l, struct bkey *k)
static inline void bch_keylist_enqueue(struct keylist *l)
{
- BUG_ON(!bch_keylist_fits(l, l->top->u64s));
+ BUG_ON(!bch_keylist_fits(l, l->top->k.u64s));
l->top = __bch_keylist_next(l, l->top);
}
-static inline void bch_keylist_add(struct keylist *l, const struct bkey *k)
+static inline void bch_keylist_add(struct keylist *l, const struct bkey_i *k)
{
bkey_copy(l->top, k);
bch_keylist_enqueue(l);
@@ -166,7 +189,7 @@ static inline size_t bch_keylist_nkeys(struct keylist *l)
(l->end_keys_p - l->bot_p));
}
-static inline struct bkey *bch_keylist_front(struct keylist *l)
+static inline struct bkey_i *bch_keylist_front(struct keylist *l)
{
return l->bot;
}
@@ -185,7 +208,7 @@ static inline void bch_keylist_dequeue(struct keylist *l)
.end_keys = bkey_next(k) \
}
-void bch_keylist_add_in_order(struct keylist *, struct bkey *);
+void bch_keylist_add_in_order(struct keylist *, struct bkey_i *);
int bch_keylist_realloc(struct keylist *, unsigned need);
int bch_keylist_realloc_max(struct keylist *, unsigned need, unsigned max);
@@ -259,7 +282,7 @@ enum bch_write_flags {
void bch_write_op_init(struct bch_write_op *, struct cache_set *,
struct bio *, struct write_point *,
- const struct bkey *, const struct bkey *, unsigned);
+ struct bkey_s_c, struct bkey_s_c, unsigned);
struct bbio {
struct cache *ca;
@@ -269,7 +292,7 @@ struct bbio {
unsigned int bi_bvec_done; /* number of bytes completed in
current bvec */
unsigned submit_time_us;
- struct bkey key;
+ struct bkey_i key;
struct bch_extent_ptr ptr;
/* Only ever have a single pointer (the one we're doing io to/from) */
struct bio bio;
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 4edfd7ed78c6..ea75ce6d4198 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -45,19 +45,16 @@ DECLARE_EVENT_CLASS(bkey,
__field(u32, size )
__field(u32, inode )
__field(u64, offset )
- __field(bool, cached )
),
TP_fast_assign(
__entry->inode = k->p.inode;
__entry->offset = k->p.offset;
__entry->size = k->size;
- __entry->cached = bkey_extent_cached(k);
),
- TP_printk("%u:%llu len %u%s", __entry->inode,
- __entry->offset, __entry->size,
- __entry->cached ? " cached" : "")
+ TP_printk("%u:%llu len %u", __entry->inode,
+ __entry->offset, __entry->size)
);
/* request.c */
@@ -396,8 +393,8 @@ DECLARE_EVENT_CLASS(btree_node,
__entry->bucket = PTR_BUCKET_NR_TRACE(b->c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
- __entry->inode = b->key.p.inode;
- __entry->offset = b->key.p.offset;
+ __entry->inode = b->key.k.p.inode;
+ __entry->offset = b->key.k.p.offset;
),
TP_printk("%pU bucket %llu(%u) id %u: %u:%llu",
@@ -583,7 +580,7 @@ DEFINE_EVENT(btree_node_op, bcache_btree_intent_lock_fail,
);
TRACE_EVENT(bcache_btree_insert_key,
- TP_PROTO(struct btree *b, struct bkey *k, unsigned op,
+ TP_PROTO(struct btree *b, struct bkey_i *k, unsigned op,
bool insert_done),
TP_ARGS(b, k, op, insert_done),
@@ -606,13 +603,13 @@ TRACE_EVENT(bcache_btree_insert_key,
__entry->b_bucket = PTR_BUCKET_NR_TRACE(b->c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
- __entry->b_inode = b->key.p.inode;
- __entry->b_offset = b->key.p.offset;
+ __entry->b_inode = b->key.k.p.inode;
+ __entry->b_offset = b->key.k.p.offset;
__entry->bucket = PTR_BUCKET_NR_TRACE(b->c, k, 0);
- __entry->inode = k->p.inode;
- __entry->offset = k->p.offset;
- __entry->size = k->size;
- __entry->cached = bkey_extent_cached(k);
+ __entry->inode = k->k.p.inode;
+ __entry->offset = k->k.p.offset;
+ __entry->size = k->k.size;
+ __entry->cached = bkey_extent_cached(bkey_i_to_s_c(k));
__entry->op = op;
__entry->insert_done = insert_done;
),
@@ -644,8 +641,8 @@ DECLARE_EVENT_CLASS(btree_split,
__entry->bucket = PTR_BUCKET_NR_TRACE(b->c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
- __entry->inode = b->key.p.inode;
- __entry->offset = b->key.p.offset;
+ __entry->inode = b->key.k.p.inode;
+ __entry->offset = b->key.k.p.offset;
__entry->keys = keys;
),
@@ -688,8 +685,8 @@ TRACE_EVENT(bcache_btree_gc_coalesce,
__entry->bucket = PTR_BUCKET_NR_TRACE(b->c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
- __entry->inode = b->key.p.inode;
- __entry->offset = b->key.p.offset;
+ __entry->inode = b->key.k.p.inode;
+ __entry->offset = b->key.k.p.offset;
__entry->nodes = nodes;
),
@@ -723,8 +720,8 @@ TRACE_EVENT(bcache_btree_node_alloc_replacement,
__entry->bucket = PTR_BUCKET_NR_TRACE(b->c, &b->key, 0);
__entry->level = b->level;
__entry->id = b->btree_id;
- __entry->inode = b->key.p.inode;
- __entry->offset = b->key.p.offset;
+ __entry->inode = b->key.k.p.inode;
+ __entry->offset = b->key.k.p.offset;
),
TP_printk("%pU for %llu bucket %llu(%u) id %u: %u:%llu",
@@ -784,10 +781,10 @@ DEFINE_EVENT(cache_set, bcache_gc_periodic,
);
TRACE_EVENT(bcache_add_sectors,
- TP_PROTO(struct cache *ca, const struct bkey_i_extent *e,
+ TP_PROTO(struct cache *ca, const struct bkey *k,
const struct bch_extent_ptr *ptr, u64 offset,
int sectors, bool dirty),
- TP_ARGS(ca, e, ptr, offset, sectors, dirty),
+ TP_ARGS(ca, k, ptr, offset, sectors, dirty),
TP_STRUCT__entry(
__array(char, uuid, 16 )
@@ -800,8 +797,8 @@ TRACE_EVENT(bcache_add_sectors,
TP_fast_assign(
memcpy(__entry->uuid, ca->sb.disk_uuid.b, 16);
- __entry->inode = e->k.p.inode;
- __entry->offset = e->k.p.offset;
+ __entry->inode = k->p.inode;
+ __entry->offset = k->p.offset;
__entry->sectors = sectors;
__entry->bucket = PTR_BUCKET_NR(ca, ptr);
__entry->dirty = dirty;
@@ -1223,7 +1220,6 @@ TRACE_EVENT(bcache_writeback_error,
__field(u32, size )
__field(u32, inode )
__field(u64, offset )
- __field(bool, cached )
__field(bool, write )
__field(int, error )
),
@@ -1232,14 +1228,12 @@ TRACE_EVENT(bcache_writeback_error,
__entry->inode = k->p.inode;
__entry->offset = k->p.offset;
__entry->size = k->size;
- __entry->cached = bkey_extent_cached(k);
__entry->write = write;
__entry->error = error;
),
- TP_printk("%u:%llu len %u%s %s error %d", __entry->inode,
+ TP_printk("%u:%llu len %u %s error %d", __entry->inode,
__entry->offset, __entry->size,
- __entry->cached ? " cached" : "",
__entry->write ? "write" : "read",
__entry->error)
);
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 36aaa0550219..d66a5b91209e 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -28,13 +28,19 @@ static inline void SET_##name(type *k, __u64 v) \
k->field |= (v & ~(~0ULL << (end - offset))) << offset; \
}
+struct bkey_format {
+ __u8 key_u64s;
+ __u8 nr_fields;
+ /* One unused slot for now: */
+ __u8 bits_per_field[6];
+ __u64 field_offset[6];
+};
+
/* Btree keys - all units are in sectors */
struct bpos {
/* Word order matches machine byte order */
#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
- __u32 kw[0];
- /* key starts here */
__u32 snapshot;
__u64 offset;
__u64 inode;
@@ -42,7 +48,6 @@ struct bpos {
__u64 inode;
__u64 offset; /* Points to end of extent - sectors */
__u32 snapshot;
- __u32 kw[0];
#else
#error edit for your odd byteorder.
#endif
@@ -71,6 +76,29 @@ struct bch_val {
__u64 __nothing[0];
};
+struct bkey_packed {
+ __u64 _data[0];
+
+ /* Size of combined key and value, in u64s */
+ __u8 u64s;
+
+ /* Format of key (0 for format local to btree node */
+ __u8 format;
+
+ /* Type of the value */
+ __u8 type;
+ __u8 key_start[0];
+
+ /*
+ * We copy bkeys with struct assignment in various places, and while
+ * that shouldn't be done with packed bkeys we can't disallow it in C,
+ * and it's legal to cast a bkey to a bkey_packed - so padding it out
+ * to the same size as struct bkey should hopefully be safest.
+ */
+ __u8 pad[5];
+ __u64 pad2[4];
+} __attribute__((packed)) __attribute__((aligned(8)));
+
struct bkey {
__u64 _data[0];
@@ -84,20 +112,55 @@ struct bkey {
__u8 type;
__u8 pad[1];
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+ __u32 version;
+ __u32 size; /* extent size, in sectors */
+ struct bpos p;
+#elif defined(__BYTE_ORDER) ? __BYTE_ORDER == __BIG_ENDIAN : defined(__BIG_ENDIAN)
struct bpos p;
__u32 size; /* extent size, in sectors */
__u32 version;
-
- struct bch_val _val;
+#endif
} __attribute__((packed)) __attribute__((aligned(8)));
#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64))
+#define KEY_PACKED_BITS_START 24
#define KEY_SIZE_MAX ((__u32)~0U)
#define KEY_FORMAT_LOCAL_BTREE 0
#define KEY_FORMAT_CURRENT 1
+enum bch_bkey_fields {
+ BKEY_FIELD_INODE,
+ BKEY_FIELD_OFFSET,
+ BKEY_FIELD_SNAPSHOT,
+ BKEY_FIELD_SIZE,
+ BKEY_FIELD_VERSION,
+ BKEY_NR_FIELDS,
+};
+
+#define bkey_format_field(name, field) \
+ [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8)
+
+#define BKEY_FORMAT_CURRENT (struct bkey_format) { \
+ .key_u64s = BKEY_U64s, \
+ .nr_fields = BKEY_NR_FIELDS, \
+ .bits_per_field = { \
+ bkey_format_field(INODE, p.inode), \
+ bkey_format_field(OFFSET, p.offset), \
+ bkey_format_field(SNAPSHOT, p.snapshot), \
+ bkey_format_field(SIZE, size), \
+ bkey_format_field(VERSION, version), \
+ }, \
+}
+
+/* bkey with inline value */
+struct bkey_i {
+ struct bkey k;
+ struct bch_val v;
+};
+
#ifndef __cplusplus
#define KEY(_inode, _offset, _size) \
@@ -131,21 +194,24 @@ static inline void bkey_init(struct bkey *k)
*k = KEY(0, 0, 0);
}
-static inline unsigned long bkey_bytes(const struct bkey *k)
+#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64))
+
+static inline void bkey_copy(struct bkey_i *dst, const struct bkey_i *src)
{
- return k->u64s * sizeof(__u64);
+ memcpy(dst, src, bkey_bytes(&src->k));
}
-#define bkey_copy(_dst, _src) memcpy(_dst, _src, bkey_bytes(_src))
-
#define __BKEY_PADDED(key, pad) \
- struct { struct bkey key; __u64 key ## _pad[pad]; }
+ struct { struct bkey_i key; __u64 key ## _pad[pad]; }
#define BKEY_VAL_TYPE(name, nr) \
struct bkey_i_##name { \
- struct bkey k; \
- struct bch_##name v; \
-};
+ union { \
+ struct bkey k; \
+ struct bkey_i k_i; \
+ }; \
+ struct bch_##name v; \
+}; \
/*
* - DELETED keys are used internally to mark keys that should be ignored but
@@ -258,6 +324,7 @@ struct bch_inode {
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
struct bch_inode_blockdev {
+ struct bch_val v;
struct bch_inode i_inode;
uuid_le i_uuid;
@@ -544,7 +611,7 @@ static inline __u64 bset_magic(struct cache_sb *sb)
DEF_BTREE_ID(INODES, 1, "inodes") \
DEF_BTREE_ID(DIRENTS, 2, "dirents") \
DEF_BTREE_ID(XATTRS, 3, "attributes") \
- DEF_BTREE_ID(SHARED, 4, "shared extents")
+ DEF_BTREE_ID(SHARED, 4, "shared_extents")
#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,
@@ -562,7 +629,7 @@ struct jset_entry {
__u32 flags; /* designates what this jset holds */
union {
- struct bkey start[0];
+ struct bkey_i start[0];
__u64 _data[0];
};
};
@@ -641,8 +708,11 @@ struct bset {
__u32 pad;
__u32 u64s; /* count of d[] in u64s */
+ /* NOTE: all bsets in the same btree node must have the same format */
+ struct bkey_format format;
+
union {
- struct bkey start[0];
+ struct bkey_packed start[0];
__u64 _data[0];
};
};