summaryrefslogtreecommitdiff
path: root/libbcachefs/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r--libbcachefs/extents.c1176
1 files changed, 411 insertions, 765 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index ebaf390f..dc3fbfb6 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -27,209 +27,270 @@
#include <trace/events/bcachefs.h>
-static void sort_key_next(struct btree_node_iter_large *iter,
- struct btree *b,
- struct btree_node_iter_set *i)
+unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k)
{
- i->k += __btree_node_offset_to_key(b, i->k)->u64s;
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+ const struct bch_extent_ptr *ptr;
+ unsigned nr_ptrs = 0;
- if (i->k == i->end)
- *i = iter->data[--iter->used];
+ bkey_for_each_ptr(p, ptr)
+ nr_ptrs++;
+
+ return nr_ptrs;
}
-/*
- * Returns true if l > r - unless l == r, in which case returns true if l is
- * older than r.
- *
- * Necessary for btree_sort_fixup() - if there are multiple keys that compare
- * equal in different sets, we have to process them newest to oldest.
- */
-#define key_sort_cmp(h, l, r) \
-({ \
- bkey_cmp_packed(b, \
- __btree_node_offset_to_key(b, (l).k), \
- __btree_node_offset_to_key(b, (r).k)) \
- \
- ?: (l).k - (r).k; \
-})
-
-static inline bool should_drop_next_key(struct btree_node_iter_large *iter,
- struct btree *b)
+unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k)
{
- struct btree_node_iter_set *l = iter->data, *r = iter->data + 1;
- struct bkey_packed *k = __btree_node_offset_to_key(b, l->k);
-
- if (bkey_whiteout(k))
- return true;
+ unsigned nr_ptrs = 0;
- if (iter->used < 2)
- return false;
+ switch (k.k->type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_extent: {
+ struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k);
+ const struct bch_extent_ptr *ptr;
- if (iter->used > 2 &&
- key_sort_cmp(iter, r[0], r[1]) >= 0)
- r++;
+ bkey_for_each_ptr(p, ptr)
+ nr_ptrs += !ptr->cached;
+ BUG_ON(!nr_ptrs);
+ break;
+ }
+ case KEY_TYPE_reservation:
+ nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas;
+ break;
+ }
- /*
- * key_sort_cmp() ensures that when keys compare equal the older key
- * comes first; so if l->k compares equal to r->k then l->k is older and
- * should be dropped.
- */
- return !bkey_cmp_packed(b,
- __btree_node_offset_to_key(b, l->k),
- __btree_node_offset_to_key(b, r->k));
+ return nr_ptrs;
}
-struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
- struct btree *b,
- struct btree_node_iter_large *iter)
+static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
+ struct extent_ptr_decoded p)
{
- struct bkey_packed *out = dst->start;
- struct btree_nr_keys nr;
+ unsigned i, durability = 0;
+ struct bch_dev *ca;
- memset(&nr, 0, sizeof(nr));
+ if (p.ptr.cached)
+ return 0;
- heap_resort(iter, key_sort_cmp, NULL);
+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
- while (!bch2_btree_node_iter_large_end(iter)) {
- if (!should_drop_next_key(iter, b)) {
- struct bkey_packed *k =
- __btree_node_offset_to_key(b, iter->data->k);
+ if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
+ durability = max_t(unsigned, durability, ca->mi.durability);
- bkey_copy(out, k);
- btree_keys_account_key_add(&nr, 0, out);
- out = bkey_next(out);
- }
+ for (i = 0; i < p.ec_nr; i++) {
+ struct stripe *s =
+ genradix_ptr(&c->stripes[0], p.idx);
- sort_key_next(iter, b, iter->data);
- heap_sift_down(iter, 0, key_sort_cmp, NULL);
+ if (WARN_ON(!s))
+ continue;
+
+ durability = max_t(unsigned, durability, s->nr_redundant);
}
- dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
- return nr;
+ return durability;
}
-/* Common among btree and extent ptrs */
+unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned durability = 0;
-const struct bch_extent_ptr *
-bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ durability += bch2_extent_ptr_durability(c, p);
+
+ return durability;
+}
+
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+ unsigned dev)
{
- const struct bch_extent_ptr *ptr;
+ struct bch_dev_io_failures *i;
- extent_for_each_ptr(e, ptr)
- if (ptr->dev == dev)
- return ptr;
+ for (i = f->devs; i < f->devs + f->nr; i++)
+ if (i->dev == dev)
+ return i;
return NULL;
}
-void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+ struct extent_ptr_decoded *p)
{
- struct bch_extent_ptr *ptr;
+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
- bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev);
+ f = &failed->devs[failed->nr++];
+ f->dev = p->ptr.dev;
+ f->idx = p->idx;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else if (p->idx != f->idx) {
+ f->idx = p->idx;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else {
+ f->nr_failed++;
+ }
}
-const struct bch_extent_ptr *
-bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group)
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+ const struct extent_ptr_decoded p1,
+ const struct extent_ptr_decoded p2)
{
- const struct bch_extent_ptr *ptr;
+ if (likely(!p1.idx && !p2.idx)) {
+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
- extent_for_each_ptr(e, ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
+ u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
- if (ca->mi.group &&
- ca->mi.group - 1 == group)
- return ptr;
+ /* Pick at random, biased in favor of the faster device: */
+
+ return bch2_rand_range(l1 + l2) > l1;
}
- return NULL;
+ if (force_reconstruct_read(c))
+ return p1.idx > p2.idx;
+
+ return p1.idx < p2.idx;
}
-const struct bch_extent_ptr *
-bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
+/*
+ * This picks a non-stale pointer, preferably from a device other than @avoid.
+ * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
+ * other devices, it will still pick a pointer from avoid.
+ */
+int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick)
{
- const struct bch_extent_ptr *ptr;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_dev_io_failures *f;
+ struct bch_dev *ca;
+ int ret = 0;
- extent_for_each_ptr(e, ptr)
- if (bch2_dev_in_target(c, ptr->dev, target) &&
- (!ptr->cached ||
- !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
- return ptr;
+ if (k.k->type == KEY_TYPE_error)
+ return -EIO;
- return NULL;
-}
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
-unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e)
-{
- const struct bch_extent_ptr *ptr;
- unsigned nr_ptrs = 0;
+ /*
+ * If there are any dirty pointers it's an error if we can't
+ * read:
+ */
+ if (!ret && !p.ptr.cached)
+ ret = -EIO;
- extent_for_each_ptr(e, ptr)
- nr_ptrs++;
+ if (p.ptr.cached && ptr_stale(ca, &p.ptr))
+ continue;
- return nr_ptrs;
+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+ if (f)
+ p.idx = f->nr_failed < f->nr_retries
+ ? f->idx
+ : f->idx + 1;
+
+ if (!p.idx &&
+ !bch2_dev_is_readable(ca))
+ p.idx++;
+
+ if (force_reconstruct_read(c) &&
+ !p.idx && p.ec_nr)
+ p.idx++;
+
+ if (p.idx >= p.ec_nr + 1)
+ continue;
+
+ if (ret > 0 && !ptr_better(c, p, *pick))
+ continue;
+
+ *pick = p;
+ ret = 1;
+ }
+
+ return ret;
}
-unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k)
+void bch2_bkey_append_ptr(struct bkey_i *k,
+ struct bch_extent_ptr ptr)
{
- struct bkey_s_c_extent e;
- const struct bch_extent_ptr *ptr;
- unsigned nr_ptrs = 0;
+ EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev));
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- e = bkey_s_c_to_extent(k);
+ switch (k->k.type) {
+ case KEY_TYPE_btree_ptr:
+ case KEY_TYPE_extent:
+ EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX);
- extent_for_each_ptr(e, ptr)
- nr_ptrs += !ptr->cached;
- break;
+ ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
- case BCH_RESERVATION:
- nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas;
+ memcpy((void *) &k->v + bkey_val_bytes(&k->k),
+ &ptr,
+ sizeof(ptr));
+ k->u64s++;
break;
+ default:
+ BUG();
}
-
- return nr_ptrs;
}
-static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
- struct extent_ptr_decoded p)
+void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
{
- unsigned i, durability = 0;
- struct bch_dev *ca;
+ struct bch_extent_ptr *ptr;
- if (p.ptr.cached)
- return 0;
+ bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev);
+}
- ca = bch_dev_bkey_exists(c, p.ptr.dev);
+/* extent specific utility code */
- if (ca->mi.state != BCH_MEMBER_STATE_FAILED)
- durability = max_t(unsigned, durability, ca->mi.durability);
+const struct bch_extent_ptr *
+bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
+{
+ const struct bch_extent_ptr *ptr;
- for (i = 0; i < p.ec_nr; i++) {
- struct ec_stripe *s =
- genradix_ptr(&c->ec_stripes, p.idx);
+ extent_for_each_ptr(e, ptr)
+ if (ptr->dev == dev)
+ return ptr;
- if (WARN_ON(!s))
- continue;
+ return NULL;
+}
- durability = max_t(unsigned, durability, s->nr_redundant);
+const struct bch_extent_ptr *
+bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group)
+{
+ const struct bch_extent_ptr *ptr;
+
+ extent_for_each_ptr(e, ptr) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ if (ca->mi.group &&
+ ca->mi.group - 1 == group)
+ return ptr;
}
- return durability;
+ return NULL;
}
-unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
+const struct bch_extent_ptr *
+bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target)
{
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- unsigned durability = 0;
+ const struct bch_extent_ptr *ptr;
- extent_for_each_ptr_decode(e, p, entry)
- durability += bch2_extent_ptr_durability(c, p);
+ extent_for_each_ptr(e, ptr)
+ if (bch2_dev_in_target(c, ptr->dev, target) &&
+ (!ptr->cached ||
+ !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)))
+ return ptr;
- return durability;
+ return NULL;
}
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
@@ -237,8 +298,7 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k)
unsigned ret = 0;
switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED: {
+ case KEY_TYPE_extent: {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
@@ -270,10 +330,10 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
return false;
}
-static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e,
+static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
union bch_extent_entry *entry)
{
- union bch_extent_entry *i = e.v->start;
+ union bch_extent_entry *i = ptrs.start;
if (i == entry)
return NULL;
@@ -283,23 +343,24 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e,
return i;
}
-union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
- struct bch_extent_ptr *ptr)
+union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *dst, *src, *prev;
bool drop_crc = true;
- EBUG_ON(ptr < &e.v->start->ptr ||
- ptr >= &extent_entry_last(e)->ptr);
+ EBUG_ON(ptr < &ptrs.start->ptr ||
+ ptr >= &ptrs.end->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
src = extent_entry_next(to_entry(ptr));
- if (src != extent_entry_last(e) &&
+ if (src != ptrs.end &&
!extent_entry_is_crc(src))
drop_crc = false;
dst = to_entry(ptr);
- while ((prev = extent_entry_prev(e, dst))) {
+ while ((prev = extent_entry_prev(ptrs, dst))) {
if (extent_entry_is_ptr(prev))
break;
@@ -313,8 +374,8 @@ union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
}
memmove_u64s_down(dst, src,
- (u64 *) extent_entry_last(e) - (u64 *) src);
- e.k->u64s -= (u64 *) src - (u64 *) dst;
+ (u64 *) ptrs.end - (u64 *) src);
+ k.k->u64s -= (u64 *) src - (u64 *) dst;
return dst;
}
@@ -381,7 +442,7 @@ found:
restart_narrow_pointers:
extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
if (can_narrow_crc(p.crc, n)) {
- bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr);
+ bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr);
p.ptr.offset += p.crc.offset;
p.crc = n;
bch2_extent_ptr_decoded_append(e, &p);
@@ -406,66 +467,47 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
bch2_crc_cmp(l.csum, r.csum));
}
-static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
-{
- struct bch_extent_ptr *ptr;
-
- bch2_extent_drop_ptrs(e, ptr,
- ptr->cached &&
- ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
-}
-
-bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
-{
- return bch2_extent_normalize(c, k);
-}
-
void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
{
- switch (k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED: {
- union bch_extent_entry *entry;
- u64 *d = (u64 *) bkeyp_val(f, k);
- unsigned i;
+ union bch_extent_entry *entry;
+ u64 *d = (u64 *) bkeyp_val(f, k);
+ unsigned i;
- for (i = 0; i < bkeyp_val_u64s(f, k); i++)
- d[i] = swab64(d[i]);
+ for (i = 0; i < bkeyp_val_u64s(f, k); i++)
+ d[i] = swab64(d[i]);
- for (entry = (union bch_extent_entry *) d;
- entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
- entry = extent_entry_next(entry)) {
- switch (extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr:
- break;
- case BCH_EXTENT_ENTRY_crc32:
- entry->crc32.csum = swab32(entry->crc32.csum);
- break;
- case BCH_EXTENT_ENTRY_crc64:
- entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
- entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
- break;
- case BCH_EXTENT_ENTRY_crc128:
- entry->crc128.csum.hi = (__force __le64)
- swab64((__force u64) entry->crc128.csum.hi);
- entry->crc128.csum.lo = (__force __le64)
- swab64((__force u64) entry->crc128.csum.lo);
- break;
- case BCH_EXTENT_ENTRY_stripe_ptr:
- break;
- }
+ for (entry = (union bch_extent_entry *) d;
+ entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
+ entry = extent_entry_next(entry)) {
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ entry->crc32.csum = swab32(entry->crc32.csum);
+ break;
+ case BCH_EXTENT_ENTRY_crc64:
+ entry->crc64.csum_hi = swab16(entry->crc64.csum_hi);
+ entry->crc64.csum_lo = swab64(entry->crc64.csum_lo);
+ break;
+ case BCH_EXTENT_ENTRY_crc128:
+ entry->crc128.csum.hi = (__force __le64)
+ swab64((__force u64) entry->crc128.csum.hi);
+ entry->crc128.csum.lo = (__force __le64)
+ swab64((__force u64) entry->crc128.csum.lo);
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
}
- break;
- }
}
}
static const char *extent_ptr_invalid(const struct bch_fs *c,
- struct bkey_s_c_extent e,
+ struct bkey_s_c k,
const struct bch_extent_ptr *ptr,
unsigned size_ondisk,
bool metadata)
{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr2;
struct bch_dev *ca;
@@ -477,7 +519,7 @@ static const char *extent_ptr_invalid(const struct bch_fs *c,
if (!ca)
return "pointer to invalid device";
- extent_for_each_ptr(e, ptr2)
+ bkey_for_each_ptr(ptrs, ptr2)
if (ptr != ptr2 && ptr->dev == ptr2->dev)
return "multiple pointers to same device";
@@ -494,9 +536,10 @@ static const char *extent_ptr_invalid(const struct bch_fs *c,
return NULL;
}
-static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
- struct bkey_s_c_extent e)
+static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
struct bch_extent_crc_unpacked crc;
const struct bch_extent_ptr *ptr;
@@ -504,7 +547,7 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca;
bool first = true;
- extent_for_each_entry(e, entry) {
+ bkey_extent_entry_for_each(ptrs, entry) {
if (!first)
pr_buf(out, " ");
@@ -524,7 +567,7 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
- crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
+ crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u",
crc.compressed_size,
@@ -541,167 +584,48 @@ static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c,
break;
default:
pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
- goto out;
+ return;
}
first = false;
}
-out:
- if (bkey_extent_is_cached(e.k))
- pr_buf(out, " cached");
-}
-
-static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
- unsigned dev)
-{
- struct bch_dev_io_failures *i;
-
- for (i = f->devs; i < f->devs + f->nr; i++)
- if (i->dev == dev)
- return i;
-
- return NULL;
-}
-
-void bch2_mark_io_failure(struct bch_io_failures *failed,
- struct extent_ptr_decoded *p)
-{
- struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
-
- if (!f) {
- BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
-
- f = &failed->devs[failed->nr++];
- f->dev = p->ptr.dev;
- f->idx = p->idx;
- f->nr_failed = 1;
- f->nr_retries = 0;
- } else if (p->idx != f->idx) {
- f->idx = p->idx;
- f->nr_failed = 1;
- f->nr_retries = 0;
- } else {
- f->nr_failed++;
- }
-}
-
-/*
- * returns true if p1 is better than p2:
- */
-static inline bool ptr_better(struct bch_fs *c,
- const struct extent_ptr_decoded p1,
- const struct extent_ptr_decoded p2)
-{
- if (likely(!p1.idx && !p2.idx)) {
- struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
- struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
-
- u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
- u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
-
- /* Pick at random, biased in favor of the faster device: */
-
- return bch2_rand_range(l1 + l2) > l1;
- }
-
- if (force_reconstruct_read(c))
- return p1.idx > p2.idx;
-
- return p1.idx < p2.idx;
-}
-
-static int extent_pick_read_device(struct bch_fs *c,
- struct bkey_s_c_extent e,
- struct bch_io_failures *failed,
- struct extent_ptr_decoded *pick)
-{
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- struct bch_dev_io_failures *f;
- struct bch_dev *ca;
- int ret = 0;
-
- extent_for_each_ptr_decode(e, p, entry) {
- ca = bch_dev_bkey_exists(c, p.ptr.dev);
-
- if (p.ptr.cached && ptr_stale(ca, &p.ptr))
- continue;
-
- f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
- if (f)
- p.idx = f->nr_failed < f->nr_retries
- ? f->idx
- : f->idx + 1;
-
- if (!p.idx &&
- !bch2_dev_is_readable(ca))
- p.idx++;
-
- if (force_reconstruct_read(c) &&
- !p.idx && p.ec_nr)
- p.idx++;
-
- if (p.idx >= p.ec_nr + 1)
- continue;
-
- if (ret && !ptr_better(c, p, *pick))
- continue;
-
- *pick = p;
- ret = 1;
- }
-
- return ret;
}
/* Btree ptrs */
const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
- if (bkey_extent_is_cached(k.k))
- return "cached";
-
- if (k.k->size)
- return "nonzero key size";
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ const struct bch_extent_ptr *ptr;
+ const char *reason;
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big";
- switch (k.k->type) {
- case BCH_EXTENT: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
- const struct bch_extent_ptr *ptr;
- const char *reason;
-
- extent_for_each_entry(e, entry) {
- if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
- return "invalid extent entry type";
-
- if (!extent_entry_is_ptr(entry))
- return "has non ptr field";
- }
-
- extent_for_each_ptr(e, ptr) {
- reason = extent_ptr_invalid(c, e, ptr,
- c->opts.btree_node_size,
- true);
- if (reason)
- return reason;
- }
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
+ return "invalid extent entry type";
- return NULL;
+ if (!extent_entry_is_ptr(entry))
+ return "has non ptr field";
}
- default:
- return "invalid value type";
+ bkey_for_each_ptr(ptrs, ptr) {
+ reason = extent_ptr_invalid(c, k, ptr,
+ c->opts.btree_node_size,
+ true);
+ if (reason)
+ return reason;
}
+
+ return NULL;
}
void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
struct bkey_s_c k)
{
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
unsigned seq;
const char *err;
@@ -711,7 +635,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
unsigned replicas = 0;
bool bad;
- extent_for_each_ptr(e, ptr) {
+ bkey_for_each_ptr(ptrs, ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
replicas++;
@@ -737,9 +661,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
}
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked(c, btree_node_type(b),
- e.s_c, false)) {
- bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
+ !bch2_bkey_replicas_marked(c, k, false)) {
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_bug(c,
"btree key bad (replicas not marked in superblock):\n%s",
buf);
@@ -748,7 +671,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
return;
err:
- bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x",
err, buf, PTR_BUCKET_NR(ca, ptr),
mark.gen, (unsigned) mark.v.counter);
@@ -759,25 +682,16 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c,
{
const char *invalid;
- if (bkey_extent_is_data(k.k))
- extent_print_ptrs(out, c, bkey_s_c_to_extent(k));
+ bkey_ptrs_to_text(out, c, k);
invalid = bch2_btree_ptr_invalid(c, k);
if (invalid)
pr_buf(out, " invalid: %s", invalid);
}
-int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
- struct bch_io_failures *failed,
- struct extent_ptr_decoded *pick)
-{
- return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
- failed, pick);
-}
-
/* Extents */
-static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
+bool __bch2_cut_front(struct bpos where, struct bkey_s k)
{
u64 len = 0;
@@ -795,7 +709,7 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
* cause offset to point to the next bucket:
*/
if (!len)
- k.k->type = KEY_TYPE_DELETED;
+ k.k->type = KEY_TYPE_deleted;
else if (bkey_extent_is_data(k.k)) {
struct bkey_s_extent e = bkey_s_to_extent(k);
union bch_extent_entry *entry;
@@ -830,11 +744,6 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k)
return true;
}
-bool bch2_cut_front(struct bpos where, struct bkey_i *k)
-{
- return __bch2_cut_front(where, bkey_i_to_s(k));
-}
-
bool bch2_cut_back(struct bpos where, struct bkey *k)
{
u64 len = 0;
@@ -852,7 +761,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k)
k->size = len;
if (!len)
- k->type = KEY_TYPE_DELETED;
+ k->type = KEY_TYPE_deleted;
return true;
}
@@ -870,24 +779,6 @@ void bch2_key_resize(struct bkey *k,
k->size = new_size;
}
-/*
- * In extent_sort_fix_overlapping(), insert_fixup_extent(),
- * extent_merge_inline() - we're modifying keys in place that are packed. To do
- * that we have to unpack the key, modify the unpacked key - then this
- * copies/repacks the unpacked to the original as necessary.
- */
-static void extent_save(struct btree *b, struct bkey_packed *dst,
- struct bkey *src)
-{
- struct bkey_format *f = &b->format;
- struct bkey_i *dst_unpacked;
-
- if ((dst_unpacked = packed_to_bkey(dst)))
- dst_unpacked->k = *src;
- else
- BUG_ON(!bch2_bkey_pack_key(dst, src, f));
-}
-
static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
struct bkey_i *src)
{
@@ -906,170 +797,6 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
return true;
}
-/*
- * If keys compare equal, compare by pointer order:
- *
- * Necessary for sort_fix_overlapping() - if there are multiple keys that
- * compare equal in different sets, we have to process them newest to oldest.
- */
-#define extent_sort_cmp(h, l, r) \
-({ \
- struct bkey _ul = bkey_unpack_key(b, \
- __btree_node_offset_to_key(b, (l).k)); \
- struct bkey _ur = bkey_unpack_key(b, \
- __btree_node_offset_to_key(b, (r).k)); \
- \
- bkey_cmp(bkey_start_pos(&_ul), \
- bkey_start_pos(&_ur)) ?: (r).k - (l).k; \
-})
-
-static inline void extent_sort_sift(struct btree_node_iter_large *iter,
- struct btree *b, size_t i)
-{
- heap_sift_down(iter, i, extent_sort_cmp, NULL);
-}
-
-static inline void extent_sort_next(struct btree_node_iter_large *iter,
- struct btree *b,
- struct btree_node_iter_set *i)
-{
- sort_key_next(iter, b, i);
- heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
-}
-
-static void extent_sort_append(struct bch_fs *c,
- struct btree *b,
- struct btree_nr_keys *nr,
- struct bkey_packed *start,
- struct bkey_packed **prev,
- struct bkey_packed *k)
-{
- struct bkey_format *f = &b->format;
- BKEY_PADDED(k) tmp;
-
- if (bkey_whiteout(k))
- return;
-
- bch2_bkey_unpack(b, &tmp.k, k);
-
- if (*prev &&
- bch2_extent_merge(c, b, (void *) *prev, &tmp.k))
- return;
-
- if (*prev) {
- bch2_bkey_pack(*prev, (void *) *prev, f);
-
- btree_keys_account_key_add(nr, 0, *prev);
- *prev = bkey_next(*prev);
- } else {
- *prev = start;
- }
-
- bkey_copy(*prev, &tmp.k);
-}
-
-struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
- struct bset *dst,
- struct btree *b,
- struct btree_node_iter_large *iter)
-{
- struct bkey_format *f = &b->format;
- struct btree_node_iter_set *_l = iter->data, *_r;
- struct bkey_packed *prev = NULL, *out, *lk, *rk;
- struct bkey l_unpacked, r_unpacked;
- struct bkey_s l, r;
- struct btree_nr_keys nr;
-
- memset(&nr, 0, sizeof(nr));
-
- heap_resort(iter, extent_sort_cmp, NULL);
-
- while (!bch2_btree_node_iter_large_end(iter)) {
- lk = __btree_node_offset_to_key(b, _l->k);
-
- if (iter->used == 1) {
- extent_sort_append(c, b, &nr, dst->start, &prev, lk);
- extent_sort_next(iter, b, _l);
- continue;
- }
-
- _r = iter->data + 1;
- if (iter->used > 2 &&
- extent_sort_cmp(iter, _r[0], _r[1]) >= 0)
- _r++;
-
- rk = __btree_node_offset_to_key(b, _r->k);
-
- l = __bkey_disassemble(b, lk, &l_unpacked);
- r = __bkey_disassemble(b, rk, &r_unpacked);
-
- /* If current key and next key don't overlap, just append */
- if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
- extent_sort_append(c, b, &nr, dst->start, &prev, lk);
- extent_sort_next(iter, b, _l);
- continue;
- }
-
- /* Skip 0 size keys */
- if (!r.k->size) {
- extent_sort_next(iter, b, _r);
- continue;
- }
-
- /*
- * overlap: keep the newer key and trim the older key so they
- * don't overlap. comparing pointers tells us which one is
- * newer, since the bsets are appended one after the other.
- */
-
- /* can't happen because of comparison func */
- BUG_ON(_l->k < _r->k &&
- !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
-
- if (_l->k > _r->k) {
- /* l wins, trim r */
- if (bkey_cmp(l.k->p, r.k->p) >= 0) {
- sort_key_next(iter, b, _r);
- } else {
- __bch2_cut_front(l.k->p, r);
- extent_save(b, rk, r.k);
- }
-
- extent_sort_sift(iter, b, _r - iter->data);
- } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
- BKEY_PADDED(k) tmp;
-
- /*
- * r wins, but it overlaps in the middle of l - split l:
- */
- bkey_reassemble(&tmp.k, l.s_c);
- bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
-
- __bch2_cut_front(r.k->p, l);
- extent_save(b, lk, l.k);
-
- extent_sort_sift(iter, b, 0);
-
- extent_sort_append(c, b, &nr, dst->start, &prev,
- bkey_to_packed(&tmp.k));
- } else {
- bch2_cut_back(bkey_start_pos(r.k), l.k);
- extent_save(b, lk, l.k);
- }
- }
-
- if (prev) {
- bch2_bkey_pack(prev, (void *) prev, f);
- btree_keys_account_key_add(&nr, 0, prev);
- out = bkey_next(prev);
- } else {
- out = dst->start;
- }
-
- dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
- return nr;
-}
-
struct extent_insert_state {
struct btree_insert *trans;
struct btree_insert_entry *insert;
@@ -1098,13 +825,13 @@ static void verify_extent_nonoverlapping(struct btree *b,
struct bkey uk;
iter = *_iter;
- k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD);
+ k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard);
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0));
iter = *_iter;
- k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD);
+ k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard);
#if 0
BUG_ON(k &&
(uk = bkey_unpack_key(b, k),
@@ -1150,13 +877,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
verify_extent_nonoverlapping(l->b, &l->iter, insert);
node_iter = l->iter;
- k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+ k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
if (k && !bkey_written(l->b, k) &&
bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true))
return;
node_iter = l->iter;
- k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+ k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_discard);
if (k && !bkey_written(l->b, k) &&
bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false))
return;
@@ -1180,7 +907,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
bkey_copy(&split.k, insert);
if (s->deleting)
- split.k.k.type = KEY_TYPE_DISCARD;
+ split.k.k.type = KEY_TYPE_discard;
bch2_cut_back(s->committed, &split.k.k);
@@ -1202,7 +929,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
if (s->update_journal) {
bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout);
if (s->deleting)
- split.k.k.type = KEY_TYPE_DISCARD;
+ split.k.k.type = KEY_TYPE_discard;
bch2_cut_back(s->committed, &split.k.k);
@@ -1214,7 +941,6 @@ static void extent_insert_committed(struct extent_insert_state *s)
bch2_cut_front(s->committed, insert);
insert->k.needs_whiteout = false;
- s->trans->did_work = true;
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
@@ -1254,7 +980,7 @@ bch2_extent_can_insert(struct btree_insert *trans,
*u64s += BKEY_U64s;
_k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
- KEY_TYPE_DISCARD);
+ KEY_TYPE_discard);
if (!_k)
return BTREE_INSERT_OK;
@@ -1331,7 +1057,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
btree_account_key_drop(l->b, _k);
k.k->size = 0;
- k.k->type = KEY_TYPE_DELETED;
+ k.k->type = KEY_TYPE_deleted;
if (_k >= btree_bset_last(l->b)->start) {
unsigned u64s = _k->u64s;
@@ -1392,7 +1118,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
while (bkey_cmp(s->committed, insert->k.p) < 0 &&
(_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
- KEY_TYPE_DISCARD))) {
+ KEY_TYPE_discard))) {
struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k);
@@ -1424,7 +1150,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
!bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
if (!bkey_whiteout(k.k)) {
btree_account_key_drop(l->b, _k);
- _k->type = KEY_TYPE_DISCARD;
+ _k->type = KEY_TYPE_discard;
reserve_whiteout(l->b, _k);
}
break;
@@ -1555,88 +1281,66 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
{
- if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX)
- return "value too big";
-
- if (!k.k->size)
- return "zero key size";
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const union bch_extent_entry *entry;
+ struct bch_extent_crc_unpacked crc;
+ const struct bch_extent_ptr *ptr;
+ unsigned size_ondisk = e.k->size;
+ const char *reason;
+ unsigned nonce = UINT_MAX;
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
- struct bch_extent_crc_unpacked crc;
- const struct bch_extent_ptr *ptr;
- unsigned size_ondisk = e.k->size;
- const char *reason;
- unsigned nonce = UINT_MAX;
+ if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX)
+ return "value too big";
- extent_for_each_entry(e, entry) {
- if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
- return "invalid extent entry type";
+ extent_for_each_entry(e, entry) {
+ if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX)
+ return "invalid extent entry type";
- switch (extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_ptr:
- ptr = entry_to_ptr(entry);
+ switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ ptr = entry_to_ptr(entry);
- reason = extent_ptr_invalid(c, e, &entry->ptr,
- size_ondisk, false);
- if (reason)
- return reason;
- break;
- case BCH_EXTENT_ENTRY_crc32:
- case BCH_EXTENT_ENTRY_crc64:
- case BCH_EXTENT_ENTRY_crc128:
- crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
+ reason = extent_ptr_invalid(c, e.s_c, &entry->ptr,
+ size_ondisk, false);
+ if (reason)
+ return reason;
+ break;
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
+ crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry));
- if (crc.offset + e.k->size >
- crc.uncompressed_size)
- return "checksum offset + key size > uncompressed size";
+ if (crc.offset + e.k->size >
+ crc.uncompressed_size)
+ return "checksum offset + key size > uncompressed size";
- size_ondisk = crc.compressed_size;
+ size_ondisk = crc.compressed_size;
- if (!bch2_checksum_type_valid(c, crc.csum_type))
- return "invalid checksum type";
+ if (!bch2_checksum_type_valid(c, crc.csum_type))
+ return "invalid checksum type";
- if (crc.compression_type >= BCH_COMPRESSION_NR)
- return "invalid compression type";
+ if (crc.compression_type >= BCH_COMPRESSION_NR)
+ return "invalid compression type";
- if (bch2_csum_type_is_encryption(crc.csum_type)) {
- if (nonce == UINT_MAX)
- nonce = crc.offset + crc.nonce;
- else if (nonce != crc.offset + crc.nonce)
- return "incorrect nonce";
- }
- break;
- case BCH_EXTENT_ENTRY_stripe_ptr:
- break;
+ if (bch2_csum_type_is_encryption(crc.csum_type)) {
+ if (nonce == UINT_MAX)
+ nonce = crc.offset + crc.nonce;
+ else if (nonce != crc.offset + crc.nonce)
+ return "incorrect nonce";
}
+ break;
+ case BCH_EXTENT_ENTRY_stripe_ptr:
+ break;
}
-
- return NULL;
}
- case BCH_RESERVATION: {
- struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-
- if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
- return "incorrect value size";
-
- if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
- return "invalid nr_replicas";
-
- return NULL;
- }
-
- default:
- return "invalid value type";
- }
+ return NULL;
}
-static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
- struct bkey_s_c_extent e)
+void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
+ struct bkey_s_c k)
{
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct bch_dev *ca;
struct bucket_mark mark;
@@ -1698,8 +1402,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
}
if (replicas > BCH_REPLICAS_MAX) {
- bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
- e.s_c);
+ bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
bch2_fs_bug(c,
"extent key bad (too many replicas: %u): %s",
replicas, buf);
@@ -1707,10 +1410,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
}
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked(c, btree_node_type(b),
- e.s_c, false)) {
- bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
- e.s_c);
+ !bch2_bkey_replicas_marked(c, e.s_c, false)) {
+ bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
bch2_fs_bug(c,
"extent key bad (replicas not marked in superblock):\n%s",
buf);
@@ -1720,34 +1421,18 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
return;
bad_ptr:
- bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
- e.s_c);
+ bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c);
bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu "
"gen %i type %u", buf,
PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type);
}
-void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
-{
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- bch2_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k));
- break;
- case BCH_RESERVATION:
- break;
- default:
- BUG();
- }
-}
-
void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c,
struct bkey_s_c k)
{
const char *invalid;
- if (bkey_extent_is_data(k.k))
- extent_print_ptrs(out, c, bkey_s_c_to_extent(k));
+ bkey_ptrs_to_text(out, c, k);
invalid = bch2_extent_invalid(c, k);
if (invalid)
@@ -1843,41 +1528,17 @@ found:
*/
bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
{
- struct bkey_s_extent e;
-
- switch (k.k->type) {
- case KEY_TYPE_ERROR:
- return false;
-
- case KEY_TYPE_DELETED:
- return true;
- case KEY_TYPE_DISCARD:
- return bversion_zero(k.k->version);
- case KEY_TYPE_COOKIE:
- return false;
-
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- e = bkey_s_to_extent(k);
+ struct bch_extent_ptr *ptr;
- bch2_extent_drop_stale(c, e);
+ bch2_bkey_drop_ptrs(k, ptr,
+ ptr->cached &&
+ ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
- if (!bkey_val_u64s(e.k)) {
- if (bkey_extent_is_cached(e.k)) {
- k.k->type = KEY_TYPE_DISCARD;
- if (bversion_zero(k.k->version))
- return true;
- } else {
- k.k->type = KEY_TYPE_ERROR;
- }
- }
+ /* will only happen if all pointers were cached: */
+ if (!bkey_val_u64s(k.k))
+ k.k->type = KEY_TYPE_deleted;
- return false;
- case BCH_RESERVATION:
- return false;
- default:
- BUG();
- }
+ return false;
}
void bch2_extent_mark_replicas_cached(struct bch_fs *c,
@@ -1887,7 +1548,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
{
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas;
+ int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas;
if (target && extra > 0)
extent_for_each_ptr_decode(e, p, entry) {
@@ -1911,106 +1572,40 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
}
}
-/*
- * This picks a non-stale pointer, preferably from a device other than @avoid.
- * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to
- * other devices, it will still pick a pointer from avoid.
- */
-int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
- struct bch_io_failures *failed,
- struct extent_ptr_decoded *pick)
-{
- int ret;
-
- switch (k.k->type) {
- case KEY_TYPE_ERROR:
- return -EIO;
-
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
- failed, pick);
-
- if (!ret && !bkey_extent_is_cached(k.k))
- ret = -EIO;
-
- return ret;
-
- default:
- return 0;
- }
-}
-
-enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b,
+enum merge_result bch2_extent_merge(struct bch_fs *c,
struct bkey_i *l, struct bkey_i *r)
{
- struct bkey_s_extent el, er;
+ struct bkey_s_extent el = bkey_i_to_s_extent(l);
+ struct bkey_s_extent er = bkey_i_to_s_extent(r);
union bch_extent_entry *en_l, *en_r;
- if (key_merging_disabled(c))
- return BCH_MERGE_NOMERGE;
-
- /*
- * Generic header checks
- * Assumes left and right are in order
- * Left and right must be exactly aligned
- */
-
- if (l->k.u64s != r->k.u64s ||
- l->k.type != r->k.type ||
- bversion_cmp(l->k.version, r->k.version) ||
- bkey_cmp(l->k.p, bkey_start_pos(&r->k)))
+ if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k))
return BCH_MERGE_NOMERGE;
- switch (l->k.type) {
- case KEY_TYPE_DISCARD:
- case KEY_TYPE_ERROR:
- /* These types are mergeable, and no val to check */
- break;
-
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- el = bkey_i_to_s_extent(l);
- er = bkey_i_to_s_extent(r);
-
- extent_for_each_entry(el, en_l) {
- struct bch_extent_ptr *lp, *rp;
- struct bch_dev *ca;
-
- en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
+ extent_for_each_entry(el, en_l) {
+ struct bch_extent_ptr *lp, *rp;
+ struct bch_dev *ca;
- if ((extent_entry_type(en_l) !=
- extent_entry_type(en_r)) ||
- !extent_entry_is_ptr(en_l))
- return BCH_MERGE_NOMERGE;
+ en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data);
- lp = &en_l->ptr;
- rp = &en_r->ptr;
-
- if (lp->offset + el.k->size != rp->offset ||
- lp->dev != rp->dev ||
- lp->gen != rp->gen)
- return BCH_MERGE_NOMERGE;
+ if ((extent_entry_type(en_l) !=
+ extent_entry_type(en_r)) ||
+ !extent_entry_is_ptr(en_l))
+ return BCH_MERGE_NOMERGE;
- /* We don't allow extents to straddle buckets: */
- ca = bch_dev_bkey_exists(c, lp->dev);
+ lp = &en_l->ptr;
+ rp = &en_r->ptr;
- if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
- return BCH_MERGE_NOMERGE;
- }
+ if (lp->offset + el.k->size != rp->offset ||
+ lp->dev != rp->dev ||
+ lp->gen != rp->gen)
+ return BCH_MERGE_NOMERGE;
- break;
- case BCH_RESERVATION: {
- struct bkey_i_reservation *li = bkey_i_to_reservation(l);
- struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+ /* We don't allow extents to straddle buckets: */
+ ca = bch_dev_bkey_exists(c, lp->dev);
- if (li->v.generation != ri->v.generation ||
- li->v.nr_replicas != ri->v.nr_replicas)
+ if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
return BCH_MERGE_NOMERGE;
- break;
- }
- default:
- return BCH_MERGE_NOMERGE;
}
l->k.needs_whiteout |= r->k.needs_whiteout;
@@ -2060,7 +1655,7 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bkey_unpack(b, &li.k, l);
bch2_bkey_unpack(b, &ri.k, r);
- ret = bch2_extent_merge(c, b, &li.k, &ri.k);
+ ret = bch2_bkey_merge(c, &li.k, &ri.k);
if (ret == BCH_MERGE_NOMERGE)
return false;
@@ -2128,3 +1723,54 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size)
return ret;
}
+
+/* KEY_TYPE_reservation: */
+
+const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation))
+ return "incorrect value size";
+
+ if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX)
+ return "invalid nr_replicas";
+
+ return NULL;
+}
+
+void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ pr_buf(out, "generation %u replicas %u",
+ le32_to_cpu(r.v->generation),
+ r.v->nr_replicas);
+}
+
+enum merge_result bch2_reservation_merge(struct bch_fs *c,
+ struct bkey_i *l, struct bkey_i *r)
+{
+ struct bkey_i_reservation *li = bkey_i_to_reservation(l);
+ struct bkey_i_reservation *ri = bkey_i_to_reservation(r);
+
+ if (li->v.generation != ri->v.generation ||
+ li->v.nr_replicas != ri->v.nr_replicas)
+ return BCH_MERGE_NOMERGE;
+
+ l->k.needs_whiteout |= r->k.needs_whiteout;
+
+ /* Keys with no pointers aren't restricted to one bucket and could
+ * overflow KEY_SIZE
+ */
+ if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) {
+ bch2_key_resize(&l->k, KEY_SIZE_MAX);
+ bch2_cut_front(l->k.p, r);
+ return BCH_MERGE_PARTIAL;
+ }
+
+ bch2_key_resize(&l->k, l->k.size + r->k.size);
+
+ return BCH_MERGE_MERGE;
+}