summaryrefslogtreecommitdiff
path: root/libbcache/extents.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-10-03 19:22:17 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-02-28 03:05:38 -0900
commita5b5eba7f788bb77cf57f9c94f3474a2d439ab0b (patch)
tree278813d1b1a9024174531376d41a2ba04a3b27f6 /libbcache/extents.c
parente4d1c93d85a5b86c04599bfc9f658308d741fd41 (diff)
New on disk format - encryption
Diffstat (limited to 'libbcache/extents.c')
-rw-r--r--libbcache/extents.c443
1 files changed, 232 insertions, 211 deletions
diff --git a/libbcache/extents.c b/libbcache/extents.c
index c026d59..4b8a266 100644
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -9,19 +9,19 @@
#include "bkey_methods.h"
#include "btree_gc.h"
#include "btree_update.h"
+#include "checksum.h"
#include "debug.h"
#include "dirent.h"
#include "error.h"
#include "extents.h"
#include "inode.h"
#include "journal.h"
-#include "super.h"
+#include "super-io.h"
#include "writeback.h"
#include "xattr.h"
#include <trace/events/bcache.h>
-static bool __bch_extent_normalize(struct cache_set *, struct bkey_s, bool);
static enum merge_result bch_extent_merge(struct cache_set *, struct btree *,
struct bkey_i *, struct bkey_i *);
@@ -120,21 +120,38 @@ bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
return NULL;
}
-unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e,
- const struct bch_extent_ptr *start)
+unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent e)
{
const struct bch_extent_ptr *ptr;
unsigned nr_ptrs = 0;
- extent_for_each_ptr_from(e, ptr, start)
+ extent_for_each_ptr(e, ptr)
nr_ptrs++;
return nr_ptrs;
}
-unsigned bch_extent_nr_ptrs(struct bkey_s_c_extent e)
+unsigned bch_extent_nr_dirty_ptrs(struct bkey_s_c k)
{
- return bch_extent_nr_ptrs_from(e, &e.v->start->ptr);
+ struct bkey_s_c_extent e;
+ const struct bch_extent_ptr *ptr;
+ unsigned nr_ptrs = 0;
+
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED:
+ e = bkey_s_c_to_extent(k);
+
+ extent_for_each_ptr(e, ptr)
+ nr_ptrs += !ptr->cached;
+ break;
+
+ case BCH_RESERVATION:
+ nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas;
+ break;
+ }
+
+ return nr_ptrs;
}
/* returns true if equal */
@@ -177,16 +194,19 @@ void bch_extent_crc_narrow_pointers(struct bkey_s_extent e, union bch_extent_crc
*
* and then verify that crc_dead1 + crc_live + crc_dead2 == orig_crc, and then
* use crc_live here (that we verified was correct earlier)
+ *
+ * note: doesn't work with encryption
*/
void bch_extent_narrow_crcs(struct bkey_s_extent e)
{
union bch_extent_crc *crc;
bool have_wide = false, have_narrow = false;
- u64 csum = 0;
+ struct bch_csum csum = { 0 };
unsigned csum_type = 0;
extent_for_each_crc(e, crc) {
- if (crc_compression_type(crc))
+ if (crc_compression_type(crc) ||
+ bch_csum_type_is_encryption(crc_csum_type(crc)))
continue;
if (crc_uncompressed_size(e.k, crc) != e.k->size) {
@@ -210,26 +230,38 @@ void bch_extent_narrow_crcs(struct bkey_s_extent e)
case BCH_EXTENT_CRC_NONE:
BUG();
case BCH_EXTENT_CRC32:
- if (bch_crc_size[csum_type] > sizeof(crc->crc32.csum))
+ if (bch_crc_bytes[csum_type] > 4)
continue;
bch_extent_crc_narrow_pointers(e, crc);
- crc->crc32.compressed_size = e.k->size;
- crc->crc32.uncompressed_size = e.k->size;
+ crc->crc32._compressed_size = e.k->size - 1;
+ crc->crc32._uncompressed_size = e.k->size - 1;
crc->crc32.offset = 0;
crc->crc32.csum_type = csum_type;
- crc->crc32.csum = csum;
+ crc->crc32.csum = csum.lo;
break;
case BCH_EXTENT_CRC64:
- if (bch_crc_size[csum_type] > sizeof(crc->crc64.csum))
+ if (bch_crc_bytes[csum_type] > 10)
continue;
bch_extent_crc_narrow_pointers(e, crc);
- crc->crc64.compressed_size = e.k->size;
- crc->crc64.uncompressed_size = e.k->size;
+ crc->crc64._compressed_size = e.k->size - 1;
+ crc->crc64._uncompressed_size = e.k->size - 1;
crc->crc64.offset = 0;
crc->crc64.csum_type = csum_type;
- crc->crc64.csum = csum;
+ crc->crc64.csum_lo = csum.lo;
+ crc->crc64.csum_hi = csum.hi;
+ break;
+ case BCH_EXTENT_CRC128:
+ if (bch_crc_bytes[csum_type] > 16)
+ continue;
+
+ bch_extent_crc_narrow_pointers(e, crc);
+ crc->crc128._compressed_size = e.k->size - 1;
+ crc->crc128._uncompressed_size = e.k->size - 1;
+ crc->crc128.offset = 0;
+ crc->crc128.csum_type = csum_type;
+ crc->crc128.csum = csum;
break;
}
}
@@ -300,13 +332,8 @@ static void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent e)
struct bch_extent_ptr *ptr = &e.v->start->ptr;
bool dropped = false;
- /*
- * We don't want to change which pointers are considered cached/dirty,
- * so don't remove pointers that are considered dirty:
- */
rcu_read_lock();
- while ((ptr = extent_ptr_next(e, ptr)) &&
- !bch_extent_ptr_is_dirty(c, e.c, ptr))
+ while ((ptr = extent_ptr_next(e, ptr)))
if (should_drop_ptr(c, e.c, ptr)) {
__bch_extent_drop_ptr(e, ptr);
dropped = true;
@@ -321,16 +348,43 @@ static void bch_extent_drop_stale(struct cache_set *c, struct bkey_s_extent e)
static bool bch_ptr_normalize(struct cache_set *c, struct btree *bk,
struct bkey_s k)
{
- return __bch_extent_normalize(c, k, false);
+ return bch_extent_normalize(c, k);
}
static void bch_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
{
- u64 *d = (u64 *) bkeyp_val(f, k);
- unsigned i;
+ switch (k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ union bch_extent_entry *entry;
+ u64 *d = (u64 *) bkeyp_val(f, k);
+ unsigned i;
- for (i = 0; i < bkeyp_val_u64s(f, k); i++)
- d[i] = swab64(d[i]);
+ for (i = 0; i < bkeyp_val_u64s(f, k); i++)
+ d[i] = swab64(d[i]);
+
+ for (entry = (union bch_extent_entry *) d;
+ entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+ entry = extent_entry_next(entry)) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_crc32:
+ entry->crc32.csum = swab32(entry->crc32.csum);
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ entry->crc128.csum.hi = swab64(entry->crc64.csum_hi);
+ entry->crc128.csum.lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_ptr:
+ break;
+ }
+ }
+ break;
+ }
+ }
}
static const char *extent_ptr_invalid(struct bkey_s_c_extent e,
@@ -341,7 +395,7 @@ static const char *extent_ptr_invalid(struct bkey_s_c_extent e,
const struct bch_extent_ptr *ptr2;
const struct cache_member_cpu *m = mi->m + ptr->dev;
- if (ptr->dev > mi->nr_in_set || !m->valid)
+ if (ptr->dev > mi->nr_devices || !m->valid)
return "pointer to invalid device";
extent_for_each_ptr(e, ptr2)
@@ -380,7 +434,9 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
crc = entry_to_crc(entry);
+
p("crc: c_size %u size %u offset %u csum %u compress %u",
crc_compressed_size(e.k, crc),
crc_uncompressed_size(e.k, crc),
@@ -388,7 +444,8 @@ static size_t extent_print_ptrs(struct cache_set *c, char *buf,
crc_compression_type(crc));
break;
case BCH_EXTENT_ENTRY_ptr:
- ptr = &entry->ptr;
+ ptr = entry_to_ptr(entry);
+
p("ptr: %u:%llu gen %u%s", ptr->dev,
(u64) ptr->offset, ptr->gen,
(ca = PTR_CACHE(c, ptr)) && ptr_stale(ca, ptr)
@@ -621,6 +678,10 @@ static bool __bch_cut_front(struct bpos where, struct bkey_s k)
if (prev_crc != crc)
crc->crc64.offset += e.k->size - len;
break;
+ case BCH_EXTENT_CRC128:
+ if (prev_crc != crc)
+ crc->crc128.offset += e.k->size - len;
+ break;
}
prev_crc = crc;
}
@@ -948,7 +1009,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
BUG_ON(!l.k->size || !r.k->size);
if (l.k->type != r.k->type ||
- l.k->version != r.k->version)
+ bversion_cmp(l.k->version, r.k->version))
return false;
switch (l.k->type) {
@@ -985,7 +1046,7 @@ static bool bch_extent_cmpxchg_cmp(struct bkey_s_c l, struct bkey_s_c r)
extent_for_each_ptr(le, lp) {
const union bch_extent_entry *entry =
- bkey_idx(re.v, (u64 *) lp - le.v->_data);
+ vstruct_idx(re.v, (u64 *) lp - le.v->_data);
if (!extent_entry_is_ptr(entry))
return false;
@@ -1142,7 +1203,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY) &&
bkey_cmp(s->committed, insert->k.p) &&
- bkey_extent_is_compressed(c, bkey_i_to_s_c(insert))) {
+ bkey_extent_is_compressed(bkey_i_to_s_c(insert))) {
/* XXX: possibly need to increase our reservation? */
bch_cut_subtract_back(s, s->committed,
bkey_i_to_s(&split.k));
@@ -1178,12 +1239,19 @@ __extent_insert_advance_pos(struct extent_insert_state *s,
{
struct extent_insert_hook *hook = s->trans->hook;
enum extent_insert_hook_ret ret;
-
+#if 0
+ /*
+ * Currently disabled for encryption - broken with fcollapse. Will have
+ * to reenable when versions are exposed for send/receive - versions
+ * will have to be monotonic then:
+ */
if (k.k && k.k->size &&
- s->insert->k->k.version &&
- k.k->version > s->insert->k->k.version)
+ !bversion_zero(s->insert->k->k.version) &&
+ bversion_cmp(k.k->version, s->insert->k->k.version) > 0) {
ret = BTREE_HOOK_NO_INSERT;
- else if (hook)
+ } else
+#endif
+ if (hook)
ret = hook->fn(hook, s->committed, next_pos, k, s->insert->k);
else
ret = BTREE_HOOK_DO_INSERT;
@@ -1257,7 +1325,7 @@ extent_insert_check_split_compressed(struct extent_insert_state *s,
unsigned sectors;
if (overlap == BCH_EXTENT_OVERLAP_MIDDLE &&
- (sectors = bkey_extent_is_compressed(c, k))) {
+ (sectors = bkey_extent_is_compressed(k))) {
int flags = BCH_DISK_RESERVATION_BTREE_LOCKS_HELD;
if (s->trans->flags & BTREE_INSERT_NOFAIL)
@@ -1680,6 +1748,7 @@ static const char *bch_extent_invalid(const struct cache_set *c,
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
const union bch_extent_crc *crc;
+ const struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi = cache_member_info_get(c);
unsigned size_ondisk = e.k->size;
const char *reason;
@@ -1689,9 +1758,7 @@ static const char *bch_extent_invalid(const struct cache_set *c,
if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
goto invalid;
- switch (extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_crc32:
- case BCH_EXTENT_ENTRY_crc64:
+ if (extent_entry_is_crc(entry)) {
crc = entry_to_crc(entry);
reason = "checksum offset + key size > uncompressed size";
@@ -1702,19 +1769,19 @@ static const char *bch_extent_invalid(const struct cache_set *c,
size_ondisk = crc_compressed_size(e.k, crc);
reason = "invalid checksum type";
- if (crc_csum_type(crc) >= BCH_CSUM_NR)
+ if (!bch_checksum_type_valid(c, crc_csum_type(crc)))
goto invalid;
reason = "invalid compression type";
if (crc_compression_type(crc) >= BCH_COMPRESSION_NR)
goto invalid;
- break;
- case BCH_EXTENT_ENTRY_ptr:
+ } else {
+ ptr = entry_to_ptr(entry);
+
reason = extent_ptr_invalid(e, mi,
&entry->ptr, size_ondisk);
if (reason)
goto invalid;
- break;
}
}
@@ -1725,8 +1792,17 @@ invalid:
return reason;
}
- case BCH_RESERVATION:
+ case BCH_RESERVATION: {
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
+ return "incorrect value size";
+
+ if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
+ return "invalid nr_replicas";
+
return NULL;
+ }
default:
return "invalid value type";
@@ -1743,7 +1819,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
unsigned seq, stale;
char buf[160];
bool bad;
- unsigned ptrs_per_tier[CACHE_TIERS];
+ unsigned ptrs_per_tier[BCH_TIER_MAX];
unsigned tier, replicas = 0;
/*
@@ -1760,11 +1836,9 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
mi = cache_member_info_get(c);
extent_for_each_ptr(e, ptr) {
- bool dirty = bch_extent_ptr_is_dirty(c, e, ptr);
-
replicas++;
- if (ptr->dev >= mi->nr_in_set)
+ if (ptr->dev >= mi->nr_devices)
goto bad_device;
/*
@@ -1796,7 +1870,7 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
stale = ptr_stale(ca, ptr);
- cache_set_bug_on(stale && dirty, c,
+ cache_set_bug_on(stale && !ptr->cached, c,
"stale dirty pointer");
cache_set_bug_on(stale > 96, c,
@@ -1809,9 +1883,9 @@ static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
bad = (mark.is_metadata ||
(gc_pos_cmp(c->gc_pos, gc_pos_btree_node(b)) > 0 &&
!mark.owned_by_allocator &&
- !(dirty
- ? mark.dirty_sectors
- : mark.cached_sectors)));
+ !(ptr->cached
+ ? mark.cached_sectors
+ : mark.dirty_sectors)));
} while (read_seqcount_retry(&c->gc_pos_lock, seq));
if (bad)
@@ -1869,6 +1943,7 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
bch_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k));
+ break;
case BCH_RESERVATION:
break;
default:
@@ -1896,69 +1971,77 @@ static void bch_extent_to_text(struct cache_set *c, char *buf,
static unsigned PTR_TIER(struct cache_member_rcu *mi,
const struct bch_extent_ptr *ptr)
{
- return ptr->dev < mi->nr_in_set
+ return ptr->dev < mi->nr_devices
? mi->m[ptr->dev].tier
: UINT_MAX;
}
-void bch_extent_entry_append(struct bkey_i_extent *e,
- union bch_extent_entry *entry)
-{
- BUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) >
- BKEY_EXTENT_VAL_U64s_MAX);
-
- memcpy_u64s(extent_entry_last(extent_i_to_s(e)),
- entry,
- extent_entry_u64s(entry));
- e->k.u64s += extent_entry_u64s(entry);
-}
-
-const unsigned bch_crc_size[] = {
- [BCH_CSUM_NONE] = 0,
- [BCH_CSUM_CRC32C] = 4,
- [BCH_CSUM_CRC64] = 8,
-};
-
static void bch_extent_crc_init(union bch_extent_crc *crc,
unsigned compressed_size,
unsigned uncompressed_size,
unsigned compression_type,
- u64 csum, unsigned csum_type)
+ unsigned nonce,
+ struct bch_csum csum, unsigned csum_type)
{
- if (bch_crc_size[csum_type] <= 4 &&
- uncompressed_size <= CRC32_EXTENT_SIZE_MAX) {
+ if (bch_crc_bytes[csum_type] <= 4 &&
+ uncompressed_size <= CRC32_SIZE_MAX &&
+ nonce <= CRC32_NONCE_MAX) {
crc->crc32 = (struct bch_extent_crc32) {
.type = 1 << BCH_EXTENT_ENTRY_crc32,
- .compressed_size = compressed_size,
- .uncompressed_size = uncompressed_size,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
.offset = 0,
.compression_type = compression_type,
.csum_type = csum_type,
- .csum = csum,
+ .csum = *((__le32 *) &csum.lo),
};
- } else {
- BUG_ON(uncompressed_size > CRC64_EXTENT_SIZE_MAX);
+ return;
+ }
+ if (bch_crc_bytes[csum_type] <= 10 &&
+ uncompressed_size <= CRC64_SIZE_MAX &&
+ nonce <= CRC64_NONCE_MAX) {
crc->crc64 = (struct bch_extent_crc64) {
.type = 1 << BCH_EXTENT_ENTRY_crc64,
- .compressed_size = compressed_size,
- .uncompressed_size = uncompressed_size,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
+ .offset = 0,
+ .nonce = nonce,
+ .compression_type = compression_type,
+ .csum_type = csum_type,
+ .csum_lo = csum.lo,
+ .csum_hi = *((__le16 *) &csum.hi),
+ };
+ return;
+ }
+
+ if (bch_crc_bytes[csum_type] <= 16 &&
+ uncompressed_size <= CRC128_SIZE_MAX &&
+ nonce <= CRC128_NONCE_MAX) {
+ crc->crc128 = (struct bch_extent_crc128) {
+ .type = 1 << BCH_EXTENT_ENTRY_crc128,
+ ._compressed_size = compressed_size - 1,
+ ._uncompressed_size = uncompressed_size - 1,
.offset = 0,
+ .nonce = nonce,
.compression_type = compression_type,
.csum_type = csum_type,
.csum = csum,
};
+ return;
}
+
+ BUG();
}
void bch_extent_crc_append(struct bkey_i_extent *e,
unsigned compressed_size,
unsigned uncompressed_size,
unsigned compression_type,
- u64 csum, unsigned csum_type)
+ unsigned nonce,
+ struct bch_csum csum, unsigned csum_type)
{
union bch_extent_crc *crc;
- union bch_extent_crc new;
BUG_ON(compressed_size > uncompressed_size);
BUG_ON(uncompressed_size != e->k.size);
@@ -1971,123 +2054,26 @@ void bch_extent_crc_append(struct bkey_i_extent *e,
extent_for_each_crc(extent_i_to_s(e), crc)
;
- switch (extent_crc_type(crc)) {
- case BCH_EXTENT_CRC_NONE:
- if (!csum_type && !compression_type)
- return;
- break;
- case BCH_EXTENT_CRC32:
- case BCH_EXTENT_CRC64:
- if (crc_compressed_size(&e->k, crc) == compressed_size &&
- crc_uncompressed_size(&e->k, crc) == uncompressed_size &&
- crc_offset(crc) == 0 &&
- crc_compression_type(crc) == compression_type &&
- crc_csum_type(crc) == csum_type &&
- crc_csum(crc) == csum)
- return;
- break;
- }
+ if (!crc && !csum_type && !compression_type)
+ return;
+
+ if (crc &&
+ crc_compressed_size(&e->k, crc) == compressed_size &&
+ crc_uncompressed_size(&e->k, crc) == uncompressed_size &&
+ crc_offset(crc) == 0 &&
+ crc_nonce(crc) == nonce &&
+ crc_csum_type(crc) == csum_type &&
+ crc_compression_type(crc) == compression_type &&
+ crc_csum(crc).lo == csum.lo &&
+ crc_csum(crc).hi == csum.hi)
+ return;
- bch_extent_crc_init(&new,
+ bch_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)),
compressed_size,
uncompressed_size,
compression_type,
- csum, csum_type);
- bch_extent_entry_append(e, to_entry(&new));
-}
-
-static void __extent_sort_ptrs(struct cache_member_rcu *mi,
- struct bkey_s_extent src)
-{
- struct bch_extent_ptr *src_ptr, *dst_ptr;
- union bch_extent_crc *src_crc, *dst_crc;
- union bch_extent_crc _src;
- BKEY_PADDED(k) tmp;
- struct bkey_s_extent dst;
- size_t u64s, crc_u64s;
- u64 *p;
-
- /*
- * Insertion sort:
- *
- * Note: this sort needs to be stable, because pointer order determines
- * pointer dirtyness.
- */
-
- tmp.k.k = *src.k;
- dst = bkey_i_to_s_extent(&tmp.k);
- set_bkey_val_u64s(dst.k, 0);
-
- extent_for_each_ptr_crc(src, src_ptr, src_crc) {
- extent_for_each_ptr_crc(dst, dst_ptr, dst_crc)
- if (PTR_TIER(mi, src_ptr) < PTR_TIER(mi, dst_ptr))
- goto found;
-
- dst_ptr = &extent_entry_last(dst)->ptr;
- dst_crc = NULL;
-found:
- /* found insert position: */
-
- /*
- * we're making sure everything has a crc at this point, if
- * dst_ptr points to a pointer it better have a crc:
- */
- BUG_ON(dst_ptr != &extent_entry_last(dst)->ptr && !dst_crc);
- BUG_ON(dst_crc &&
- (extent_entry_next(to_entry(dst_crc)) !=
- to_entry(dst_ptr)));
-
- if (!src_crc) {
- bch_extent_crc_init(&_src, src.k->size,
- src.k->size, 0, 0, 0);
- src_crc = &_src;
- }
-
- p = dst_ptr != &extent_entry_last(dst)->ptr
- ? (void *) dst_crc
- : (void *) dst_ptr;
-
- crc_u64s = extent_entry_u64s(to_entry(src_crc));
- u64s = crc_u64s + sizeof(*dst_ptr) / sizeof(u64);
-
- memmove_u64s_up(p + u64s, p,
- (u64 *) extent_entry_last(dst) - (u64 *) p);
- set_bkey_val_u64s(dst.k, bkey_val_u64s(dst.k) + u64s);
-
- memcpy_u64s(p, src_crc, crc_u64s);
- memcpy_u64s(p + crc_u64s, src_ptr,
- sizeof(*src_ptr) / sizeof(u64));
- }
-
- /* Sort done - now drop redundant crc entries: */
- bch_extent_drop_redundant_crcs(dst);
-
- memcpy_u64s(src.v, dst.v, bkey_val_u64s(dst.k));
- set_bkey_val_u64s(src.k, bkey_val_u64s(dst.k));
-}
-
-static void extent_sort_ptrs(struct cache_set *c, struct bkey_s_extent e)
-{
- struct cache_member_rcu *mi;
- struct bch_extent_ptr *ptr, *prev = NULL;
- union bch_extent_crc *crc;
-
- /*
- * First check if any pointers are out of order before doing the actual
- * sort:
- */
- mi = cache_member_info_get(c);
-
- extent_for_each_ptr_crc(e, ptr, crc) {
- if (prev &&
- PTR_TIER(mi, ptr) < PTR_TIER(mi, prev)) {
- __extent_sort_ptrs(mi, e);
- break;
- }
- prev = ptr;
- }
-
- cache_member_info_put();
+ nonce, csum, csum_type);
+ __extent_entry_push(e);
}
/*
@@ -2098,8 +2084,7 @@ static void extent_sort_ptrs(struct cache_set *c, struct bkey_s_extent e)
* For existing keys, only called when btree nodes are being rewritten, not when
* they're merely being compacted/resorted in memory.
*/
-static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
- bool sort)
+bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
{
struct bkey_s_extent e;
@@ -2112,7 +2097,7 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
return true;
case KEY_TYPE_DISCARD:
- return !k.k->version;
+ return bversion_zero(k.k->version);
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
@@ -2120,13 +2105,10 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
bch_extent_drop_stale(c, e);
- if (sort)
- extent_sort_ptrs(c, e);
-
if (!bkey_val_u64s(e.k)) {
if (bkey_extent_is_cached(e.k)) {
k.k->type = KEY_TYPE_DISCARD;
- if (!k.k->version)
+ if (bversion_zero(k.k->version))
return true;
} else {
k.k->type = KEY_TYPE_ERROR;
@@ -2141,9 +2123,40 @@ static bool __bch_extent_normalize(struct cache_set *c, struct bkey_s k,
}
}
-bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
+void bch_extent_mark_replicas_cached(struct cache_set *c,
+ struct bkey_s_extent e,
+ unsigned nr_cached)
{
- return __bch_extent_normalize(c, k, true);
+ struct bch_extent_ptr *ptr;
+ struct cache_member_rcu *mi;
+ bool have_higher_tier;
+ unsigned tier = 0;
+
+ if (!nr_cached)
+ return;
+
+ mi = cache_member_info_get(c);
+
+ do {
+ have_higher_tier = false;
+
+ extent_for_each_ptr(e, ptr) {
+ if (!ptr->cached &&
+ PTR_TIER(mi, ptr) == tier) {
+ ptr->cached = true;
+ nr_cached--;
+ if (!nr_cached)
+ goto out;
+ }
+
+ if (PTR_TIER(mi, ptr) > tier)
+ have_higher_tier = true;
+ }
+
+ tier++;
+ } while (have_higher_tier);
+out:
+ cache_member_info_put();
}
/*
@@ -2183,7 +2196,7 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
extent_for_each_online_device_crc(c, e, crc, ptr, ca)
if (!ptr_stale(ca, ptr)) {
*ret = (struct extent_pick_ptr) {
- .crc = crc_to_64(e.k, crc),
+ .crc = crc_to_128(e.k, crc),
.ptr = *ptr,
.ca = ca,
};
@@ -2227,7 +2240,7 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
if (l->k.u64s != r->k.u64s ||
l->k.type != r->k.type ||
- l->k.version != r->k.version ||
+ bversion_cmp(l->k.version, r->k.version) ||
bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
return BCH_MERGE_NOMERGE;
@@ -2235,7 +2248,6 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
- case BCH_RESERVATION:
/* These types are mergeable, and no val to check */
break;
@@ -2248,7 +2260,7 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
struct bch_extent_ptr *lp, *rp;
struct cache_member_cpu *m;
- en_r = bkey_idx(er.v, (u64 *) en_l - el.v->_data);
+ en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
if ((extent_entry_type(en_l) !=
extent_entry_type(en_r)) ||
@@ -2276,6 +2288,15 @@ static enum merge_result bch_extent_merge(struct cache_set *c,
}
break;
+ case BCH_RESERVATION: {
+ struct bkey_i_reservation *li = bkey_i_to_reservation(l);
+ struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+
+ if (li->v.generation != ri->v.generation ||
+ li->v.nr_replicas != ri->v.nr_replicas)
+ return BCH_MERGE_NOMERGE;
+ break;
+ }
default:
return BCH_MERGE_NOMERGE;
}