summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-11-03 20:11:29 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-11-03 20:19:25 -0400
commit2ab2ab0f781ae750473763e8a042c900a982d399 (patch)
tree2d76ee40a4cf6b6a54c180a9100216cc45c26f91 /libbcachefs
parente288c9f1de8a1b21a77cbfb80dfec37f2515e33c (diff)
Update bcachefs sources to b12d1535f3 bcachefs: fix bounds checks in bch2_bio_map()
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/alloc_background.c16
-rw-r--r--libbcachefs/bcachefs.h1
-rw-r--r--libbcachefs/bcachefs_format.h30
-rw-r--r--libbcachefs/bkey_methods.h11
-rw-r--r--libbcachefs/bset.c102
-rw-r--r--libbcachefs/btree_gc.c303
-rw-r--r--libbcachefs/btree_gc.h2
-rw-r--r--libbcachefs/btree_io.c31
-rw-r--r--libbcachefs/btree_io.h2
-rw-r--r--libbcachefs/btree_types.h5
-rw-r--r--libbcachefs/btree_update_interior.c41
-rw-r--r--libbcachefs/buckets.c152
-rw-r--r--libbcachefs/buckets.h5
-rw-r--r--libbcachefs/clock.c6
-rw-r--r--libbcachefs/debug.c6
-rw-r--r--libbcachefs/extents.c332
-rw-r--r--libbcachefs/extents.h232
-rw-r--r--libbcachefs/extents_types.h13
-rw-r--r--libbcachefs/fs-io.c8
-rw-r--r--libbcachefs/fs.c18
-rw-r--r--libbcachefs/io.c42
-rw-r--r--libbcachefs/io.h4
-rw-r--r--libbcachefs/io_types.h2
-rw-r--r--libbcachefs/journal.h4
-rw-r--r--libbcachefs/journal_io.c51
-rw-r--r--libbcachefs/journal_io.h2
-rw-r--r--libbcachefs/migrate.c6
-rw-r--r--libbcachefs/move.c34
-rw-r--r--libbcachefs/movinggc.c4
-rw-r--r--libbcachefs/rebalance.c31
-rw-r--r--libbcachefs/replicas.c429
-rw-r--r--libbcachefs/replicas.h16
-rw-r--r--libbcachefs/replicas_types.h11
-rw-r--r--libbcachefs/super_types.h12
-rw-r--r--libbcachefs/sysfs.c12
-rw-r--r--libbcachefs/util.c4
-rw-r--r--libbcachefs/util.h67
37 files changed, 1027 insertions, 1020 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 7ba20c87..c3efb435 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -582,7 +582,8 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
e.nr++;
} else {
if (e.nr)
- heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
+ heap_add_or_replace(&ca->alloc_heap, e,
+ -bucket_alloc_cmp, NULL);
e = (struct alloc_heap_entry) {
.bucket = b,
@@ -595,14 +596,15 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
}
if (e.nr)
- heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
+ heap_add_or_replace(&ca->alloc_heap, e,
+ -bucket_alloc_cmp, NULL);
for (i = 0; i < ca->alloc_heap.used; i++)
nr += ca->alloc_heap.data[i].nr;
while (nr - ca->alloc_heap.data[0].nr >= ALLOC_SCAN_BATCH(ca)) {
nr -= ca->alloc_heap.data[0].nr;
- heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp);
+ heap_pop(&ca->alloc_heap, e, -bucket_alloc_cmp, NULL);
}
up_read(&ca->bucket_lock);
@@ -632,7 +634,7 @@ static void find_reclaimable_buckets_fifo(struct bch_fs *c, struct bch_dev *ca)
if (bch2_can_invalidate_bucket(ca, b, m)) {
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
- heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
+ heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
if (heap_full(&ca->alloc_heap))
break;
}
@@ -659,7 +661,7 @@ static void find_reclaimable_buckets_random(struct bch_fs *c, struct bch_dev *ca
if (bch2_can_invalidate_bucket(ca, b, m)) {
struct alloc_heap_entry e = { .bucket = b, .nr = 1, };
- heap_add(&ca->alloc_heap, e, bucket_alloc_cmp);
+ heap_add(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
if (heap_full(&ca->alloc_heap))
break;
}
@@ -697,7 +699,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
break;
}
- heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
+ heap_resort(&ca->alloc_heap, bucket_alloc_cmp, NULL);
for (i = 0; i < ca->alloc_heap.used; i++)
nr += ca->alloc_heap.data[i].nr;
@@ -718,7 +720,7 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
return b;
}
- heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp);
+ heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
}
return -1;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 92727cca..6d5c7d6b 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -312,6 +312,7 @@ enum bch_time_stats {
#include "keylist_types.h"
#include "quota_types.h"
#include "rebalance_types.h"
+#include "replicas_types.h"
#include "super_types.h"
/* Number of nodes btree coalesce will try to coalesce at once */
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index f1814f4c..cdf392b3 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -456,15 +456,19 @@ enum bch_compression_type {
BCH_COMPRESSION_NR = 5,
};
+#define BCH_EXTENT_ENTRY_TYPES() \
+ x(ptr, 0) \
+ x(crc32, 1) \
+ x(crc64, 2) \
+ x(crc128, 3)
+#define BCH_EXTENT_ENTRY_MAX 4
+
enum bch_extent_entry_type {
- BCH_EXTENT_ENTRY_ptr = 0,
- BCH_EXTENT_ENTRY_crc32 = 1,
- BCH_EXTENT_ENTRY_crc64 = 2,
- BCH_EXTENT_ENTRY_crc128 = 3,
+#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
};
-#define BCH_EXTENT_ENTRY_MAX 4
-
/* Compressed/uncompressed size are stored biased by 1: */
struct bch_extent_crc32 {
#if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -589,10 +593,10 @@ union bch_extent_entry {
#else
#error edit for your odd byteorder.
#endif
- struct bch_extent_crc32 crc32;
- struct bch_extent_crc64 crc64;
- struct bch_extent_crc128 crc128;
- struct bch_extent_ptr ptr;
+
+#define x(f, n) struct bch_extent_##f f;
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
};
enum {
@@ -1007,9 +1011,9 @@ enum bch_data_type {
};
struct bch_replicas_entry {
- u8 data_type;
- u8 nr;
- u8 devs[0];
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 devs[0];
};
struct bch_sb_field_replicas {
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index c708f8c0..cf7a5563 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -18,17 +18,6 @@ static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}
-static inline bool btree_type_has_ptrs(enum bkey_type type)
-{
- switch (type) {
- case BKEY_TYPE_BTREE:
- case BKEY_TYPE_EXTENTS:
- return true;
- default:
- return false;
- }
-}
-
struct bch_fs;
struct btree;
struct bkey;
diff --git a/libbcachefs/bset.c b/libbcachefs/bset.c
index c8e16dea..c631e30a 100644
--- a/libbcachefs/bset.c
+++ b/libbcachefs/bset.c
@@ -1689,7 +1689,7 @@ struct bkey_packed *bch2_btree_node_iter_prev_filter(struct btree_node_iter *ite
struct bkey_packed *orig_pos = bch2_btree_node_iter_peek_all(iter, b);
struct btree_node_iter_set *set;
struct bset_tree *t;
- unsigned end;
+ unsigned end = 0;
bch2_btree_node_iter_verify(iter, b);
@@ -1791,7 +1791,7 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
struct bkey_packed *l, *r, *p;
struct bkey uk, up;
char buf1[200], buf2[200];
- unsigned j;
+ unsigned j, inorder;
if (!size)
return 0;
@@ -1799,53 +1799,57 @@ int bch2_bkey_print_bfloat(struct btree *b, struct bkey_packed *k,
if (!bset_has_ro_aux_tree(t))
goto out;
- j = __inorder_to_eytzinger1(bkey_to_cacheline(b, t, k), t->size, t->extra);
- if (j &&
- j < t->size &&
- k == tree_to_bkey(b, t, j))
- switch (bkey_float(b, t, j)->exponent) {
- case BFLOAT_FAILED_UNPACKED:
- uk = bkey_unpack_key(b, k);
- return scnprintf(buf, size,
- " failed unpacked at depth %u\n"
- "\t%llu:%llu\n",
- ilog2(j),
- uk.p.inode, uk.p.offset);
- case BFLOAT_FAILED_PREV:
- p = tree_to_prev_bkey(b, t, j);
- l = is_power_of_2(j)
- ? btree_bkey_first(b, t)
- : tree_to_prev_bkey(b, t, j >> ffs(j));
- r = is_power_of_2(j + 1)
- ? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
- : tree_to_bkey(b, t, j >> (ffz(j) + 1));
-
- up = bkey_unpack_key(b, p);
- uk = bkey_unpack_key(b, k);
- bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
- bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
-
- return scnprintf(buf, size,
- " failed prev at depth %u\n"
- "\tkey starts at bit %u but first differing bit at %u\n"
- "\t%llu:%llu\n"
- "\t%llu:%llu\n"
- "\t%s\n"
- "\t%s\n",
- ilog2(j),
- bch2_bkey_greatest_differing_bit(b, l, r),
- bch2_bkey_greatest_differing_bit(b, p, k),
- uk.p.inode, uk.p.offset,
- up.p.inode, up.p.offset,
- buf1, buf2);
- case BFLOAT_FAILED_OVERFLOW:
- uk = bkey_unpack_key(b, k);
- return scnprintf(buf, size,
- " failed overflow at depth %u\n"
- "\t%llu:%llu\n",
- ilog2(j),
- uk.p.inode, uk.p.offset);
- }
+ inorder = bkey_to_cacheline(b, t, k);
+ if (!inorder || inorder >= t->size)
+ goto out;
+
+ j = __inorder_to_eytzinger1(inorder, t->size, t->extra);
+ if (k != tree_to_bkey(b, t, j))
+ goto out;
+
+ switch (bkey_float(b, t, j)->exponent) {
+ case BFLOAT_FAILED_UNPACKED:
+ uk = bkey_unpack_key(b, k);
+ return scnprintf(buf, size,
+ " failed unpacked at depth %u\n"
+ "\t%llu:%llu\n",
+ ilog2(j),
+ uk.p.inode, uk.p.offset);
+ case BFLOAT_FAILED_PREV:
+ p = tree_to_prev_bkey(b, t, j);
+ l = is_power_of_2(j)
+ ? btree_bkey_first(b, t)
+ : tree_to_prev_bkey(b, t, j >> ffs(j));
+ r = is_power_of_2(j + 1)
+ ? bch2_bkey_prev_all(b, t, btree_bkey_last(b, t))
+ : tree_to_bkey(b, t, j >> (ffz(j) + 1));
+
+ up = bkey_unpack_key(b, p);
+ uk = bkey_unpack_key(b, k);
+ bch2_to_binary(buf1, high_word(&b->format, p), b->nr_key_bits);
+ bch2_to_binary(buf2, high_word(&b->format, k), b->nr_key_bits);
+
+ return scnprintf(buf, size,
+ " failed prev at depth %u\n"
+ "\tkey starts at bit %u but first differing bit at %u\n"
+ "\t%llu:%llu\n"
+ "\t%llu:%llu\n"
+ "\t%s\n"
+ "\t%s\n",
+ ilog2(j),
+ bch2_bkey_greatest_differing_bit(b, l, r),
+ bch2_bkey_greatest_differing_bit(b, p, k),
+ uk.p.inode, uk.p.offset,
+ up.p.inode, up.p.offset,
+ buf1, buf2);
+ case BFLOAT_FAILED_OVERFLOW:
+ uk = bkey_unpack_key(b, k);
+ return scnprintf(buf, size,
+ " failed overflow at depth %u\n"
+ "\t%llu:%llu\n",
+ ilog2(j),
+ uk.p.inode, uk.p.offset);
+ }
out:
*buf = '\0';
return 0;
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index b0f9bd75..b3c69da9 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -17,6 +17,7 @@
#include "error.h"
#include "extents.h"
#include "journal.h"
+#include "journal_io.h"
#include "keylist.h"
#include "move.h"
#include "replicas.h"
@@ -31,6 +32,21 @@
#include <linux/sched/task.h>
#include <trace/events/bcachefs.h>
+static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+ write_seqcount_begin(&c->gc_pos_lock);
+ c->gc_pos = new_pos;
+ write_seqcount_end(&c->gc_pos_lock);
+}
+
+static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
+{
+ BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
+ __gc_pos_set(c, new_pos);
+}
+
+/* range_checks - for validating min/max pos of each btree node: */
+
struct range_checks {
struct range_level {
struct bpos min;
@@ -90,6 +106,19 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
}
}
+/* marking of btree keys/nodes: */
+
+static bool bkey_type_needs_gc(enum bkey_type type)
+{
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ return true;
+ default:
+ return false;
+ }
+}
+
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
{
const struct bch_extent_ptr *ptr;
@@ -112,39 +141,8 @@ u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
return max_stale;
}
-/*
- * For runtime mark and sweep:
- */
-static u8 bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
- struct bkey_s_c k, unsigned flags)
-{
- struct gc_pos pos = { 0 };
- u8 ret = 0;
-
- switch (type) {
- case BKEY_TYPE_BTREE:
- bch2_mark_key(c, k, c->opts.btree_node_size,
- BCH_DATA_BTREE, pos, NULL,
- 0, flags|
- BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
- BCH_BUCKET_MARK_GC_LOCK_HELD);
- break;
- case BKEY_TYPE_EXTENTS:
- bch2_mark_key(c, k, k.k->size, BCH_DATA_USER, pos, NULL,
- 0, flags|
- BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
- BCH_BUCKET_MARK_GC_LOCK_HELD);
- ret = bch2_btree_key_recalc_oldest_gen(c, k);
- break;
- default:
- BUG();
- }
-
- return ret;
-}
-
-int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
- struct bkey_s_c k)
+static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
+ struct bkey_s_c k)
{
enum bch_data_type data_type = type == BKEY_TYPE_BTREE
? BCH_DATA_BTREE : BCH_DATA_USER;
@@ -154,10 +152,10 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
k.k->version.lo > journal_cur_seq(&c->journal));
if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
- fsck_err_on(!bch2_bkey_replicas_marked(c, data_type, k), c,
+ fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c,
"superblock not marked as containing replicas (type %u)",
data_type)) {
- ret = bch2_mark_bkey_replicas(c, data_type, k);
+ ret = bch2_mark_bkey_replicas(c, type, k);
if (ret)
return ret;
}
@@ -198,52 +196,87 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
}
}
- atomic64_set(&c->key_version,
- max_t(u64, k.k->version.lo,
- atomic64_read(&c->key_version)));
-
- bch2_gc_mark_key(c, type, k, BCH_BUCKET_MARK_NOATOMIC);
+ if (k.k->version.lo > atomic64_read(&c->key_version))
+ atomic64_set(&c->key_version, k.k->version.lo);
fsck_err:
return ret;
}
-static unsigned btree_gc_mark_node(struct bch_fs *c, struct btree *b)
+/*
+ * For runtime mark and sweep:
+ */
+static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
+ struct bkey_s_c k, bool initial)
+{
+ struct gc_pos pos = { 0 };
+ unsigned flags =
+ BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
+ BCH_BUCKET_MARK_GC_LOCK_HELD|
+ (initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
+ int ret = 0;
+
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ if (initial) {
+ ret = bch2_btree_mark_ptrs_initial(c, type, k);
+ if (ret < 0)
+ return ret;
+ }
+ break;
+ default:
+ break;
+ }
+
+ bch2_mark_key(c, type, k, true, k.k->size,
+ pos, NULL, 0, flags);
+
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ ret = bch2_btree_key_recalc_oldest_gen(c, k);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
+ bool initial)
{
enum bkey_type type = btree_node_type(b);
struct btree_node_iter iter;
struct bkey unpacked;
struct bkey_s_c k;
u8 stale = 0;
+ int ret;
- if (btree_node_has_ptrs(b))
- for_each_btree_node_key_unpack(b, k, &iter,
- &unpacked) {
- bch2_bkey_debugcheck(c, b, k);
- stale = max(stale, bch2_gc_mark_key(c, type, k, 0));
- }
+ if (!bkey_type_needs_gc(type))
+ return 0;
- return stale;
-}
+ for_each_btree_node_key_unpack(b, k, &iter,
+ &unpacked) {
+ bch2_bkey_debugcheck(c, b, k);
-static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
- write_seqcount_begin(&c->gc_pos_lock);
- c->gc_pos = new_pos;
- write_seqcount_end(&c->gc_pos_lock);
-}
+ ret = bch2_gc_mark_key(c, type, k, initial);
+ if (ret < 0)
+ return ret;
-static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
-{
- BUG_ON(gc_pos_cmp(new_pos, c->gc_pos) <= 0);
- __gc_pos_set(c, new_pos);
+ stale = max_t(u8, stale, ret);
+ }
+
+ return stale;
}
-static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
+static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
+ bool initial)
{
struct btree_iter iter;
struct btree *b;
struct range_checks r;
- unsigned depth = btree_id == BTREE_ID_EXTENTS ? 0 : 1;
+ unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1;
unsigned max_stale;
int ret = 0;
@@ -254,8 +287,11 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
/*
* if expensive_debug_checks is on, run range_checks on all leaf nodes:
+ *
+ * and on startup, we have to read every btree node (XXX: only if it was
+ * an unclean shutdown)
*/
- if (expensive_debug_checks(c))
+ if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
@@ -266,22 +302,24 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
bch2_verify_btree_nr_keys(b);
- max_stale = btree_gc_mark_node(c, b);
+ max_stale = btree_gc_mark_node(c, b, initial);
gc_pos_set(c, gc_pos_btree_node(b));
- if (max_stale > 64)
- bch2_btree_node_rewrite(c, &iter,
- b->data->keys.seq,
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_NOWAIT|
- BTREE_INSERT_GC_LOCK_HELD);
- else if (!btree_gc_rewrite_disabled(c) &&
- (btree_gc_always_rewrite(c) || max_stale > 16))
- bch2_btree_node_rewrite(c, &iter,
- b->data->keys.seq,
- BTREE_INSERT_NOWAIT|
- BTREE_INSERT_GC_LOCK_HELD);
+ if (!initial) {
+ if (max_stale > 64)
+ bch2_btree_node_rewrite(c, &iter,
+ b->data->keys.seq,
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_NOWAIT|
+ BTREE_INSERT_GC_LOCK_HELD);
+ else if (!btree_gc_rewrite_disabled(c) &&
+ (btree_gc_always_rewrite(c) || max_stale > 16))
+ bch2_btree_node_rewrite(c, &iter,
+ b->data->keys.seq,
+ BTREE_INSERT_NOWAIT|
+ BTREE_INSERT_GC_LOCK_HELD);
+ }
bch2_btree_iter_cond_resched(&iter);
}
@@ -293,13 +331,47 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id)
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
- bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), 0);
+ bch2_gc_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&b->key), initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
mutex_unlock(&c->btree_root_lock);
return 0;
}
+static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
+ bool initial)
+{
+ unsigned i;
+
+ for (i = 0; i < BTREE_ID_NR; i++) {
+ enum bkey_type type = bkey_type(0, i);
+
+ int ret = bch2_gc_btree(c, i, initial);
+ if (ret)
+ return ret;
+
+ if (journal && bkey_type_needs_gc(type)) {
+ struct bkey_i *k, *n;
+ struct jset_entry *j;
+ struct journal_replay *r;
+ int ret;
+
+ list_for_each_entry(r, journal, list)
+ for_each_jset_key(k, n, j, &r->j) {
+ if (type == bkey_type(j->level, j->btree_id)) {
+ ret = bch2_gc_mark_key(c, type,
+ bkey_i_to_s_c(k), initial);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
static void mark_metadata_sectors(struct bch_fs *c, struct bch_dev *ca,
u64 start, u64 end,
enum bch_data_type type,
@@ -395,10 +467,10 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
for_each_pending_btree_node_free(c, as, d)
if (d->index_update_done)
- bch2_mark_key(c, bkey_i_to_s_c(&d->key),
- c->opts.btree_node_size,
- BCH_DATA_BTREE, pos,
- &stats, 0,
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&d->key),
+ true, 0,
+ pos, &stats, 0,
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
/*
@@ -522,6 +594,7 @@ void bch2_gc(struct bch_fs *c)
struct bch_dev *ca;
u64 start_time = local_clock();
unsigned i;
+ int ret;
/*
* Walk _all_ references to buckets, and recompute them:
@@ -557,14 +630,11 @@ void bch2_gc(struct bch_fs *c)
bch2_mark_superblocks(c);
- /* Walk btree: */
- for (i = 0; i < BTREE_ID_NR; i++) {
- int ret = bch2_gc_btree(c, i);
- if (ret) {
- bch_err(c, "btree gc failed: %d", ret);
- set_bit(BCH_FS_GC_FAILURE, &c->flags);
- goto out;
- }
+ ret = bch2_gc_btrees(c, NULL, false);
+ if (ret) {
+ bch_err(c, "btree gc failed: %d", ret);
+ set_bit(BCH_FS_GC_FAILURE, &c->flags);
+ goto out;
}
bch2_mark_pending_btree_node_frees(c);
@@ -1006,58 +1076,9 @@ int bch2_gc_thread_start(struct bch_fs *c)
/* Initial GC computes bucket marks during startup */
-static int bch2_initial_gc_btree(struct bch_fs *c, enum btree_id id)
-{
- struct btree_iter iter;
- struct btree *b;
- struct range_checks r;
- int ret = 0;
-
- btree_node_range_checks_init(&r, 0);
-
- gc_pos_set(c, gc_pos_btree(id, POS_MIN, 0));
-
- if (!c->btree_roots[id].b)
- return 0;
-
- b = c->btree_roots[id].b;
- if (!btree_node_fake(b))
- ret = bch2_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
- bkey_i_to_s_c(&b->key));
- if (ret)
- return ret;
-
- /*
- * We have to hit every btree node before starting journal replay, in
- * order for the journal seq blacklist machinery to work:
- */
- for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
- btree_node_range_checks(c, b, &r);
-
- if (btree_node_has_ptrs(b)) {
- struct btree_node_iter node_iter;
- struct bkey unpacked;
- struct bkey_s_c k;
-
- for_each_btree_node_key_unpack(b, k, &node_iter,
- &unpacked) {
- ret = bch2_btree_mark_key_initial(c,
- btree_node_type(b), k);
- if (ret)
- goto err;
- }
- }
-
- bch2_btree_iter_cond_resched(&iter);
- }
-err:
- return bch2_btree_iter_unlock(&iter) ?: ret;
-}
-
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{
unsigned iter = 0;
- enum btree_id id;
int ret = 0;
down_write(&c->gc_lock);
@@ -1066,13 +1087,7 @@ again:
bch2_mark_superblocks(c);
- for (id = 0; id < BTREE_ID_NR; id++) {
- ret = bch2_initial_gc_btree(c, id);
- if (ret)
- goto err;
- }
-
- ret = bch2_journal_mark(c, journal);
+ ret = bch2_gc_btrees(c, journal, true);
if (ret)
goto err;
diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h
index 214a3fe3..f9225af2 100644
--- a/libbcachefs/btree_gc.h
+++ b/libbcachefs/btree_gc.h
@@ -11,8 +11,6 @@ void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
-int bch2_btree_mark_key_initial(struct bch_fs *, enum bkey_type,
- struct bkey_s_c);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
/*
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index a4f184f3..beab463b 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -35,7 +35,7 @@ void bch2_btree_node_iter_large_push(struct btree_node_iter_large *iter,
__btree_node_key_to_offset(b, end)
});
- __heap_add(iter, n, btree_node_iter_cmp_heap);
+ __heap_add(iter, n, btree_node_iter_cmp_heap, NULL);
}
}
@@ -48,9 +48,9 @@ void bch2_btree_node_iter_large_advance(struct btree_node_iter_large *iter,
EBUG_ON(iter->data->k > iter->data->end);
if (iter->data->k == iter->data->end)
- heap_del(iter, 0, btree_node_iter_cmp_heap);
+ heap_del(iter, 0, btree_node_iter_cmp_heap, NULL);
else
- heap_sift_down(iter, 0, btree_node_iter_cmp_heap);
+ heap_sift_down(iter, 0, btree_node_iter_cmp_heap, NULL);
}
static void verify_no_dups(struct btree *b,
@@ -1345,11 +1345,9 @@ static void btree_node_read_work(struct work_struct *work)
struct bch_dev *ca = bch_dev_bkey_exists(c, rb->pick.ptr.dev);
struct btree *b = rb->bio.bi_private;
struct bio *bio = &rb->bio;
- struct bch_devs_mask avoid;
+ struct bch_io_failures failed = { .nr = 0 };
bool can_retry;
- memset(&avoid, 0, sizeof(avoid));
-
goto start;
while (1) {
bch_info(c, "retrying read");
@@ -1372,8 +1370,9 @@ start:
percpu_ref_put(&ca->io_ref);
rb->have_ioref = false;
- __set_bit(rb->pick.ptr.dev, avoid.d);
- can_retry = bch2_btree_pick_ptr(c, b, &avoid, &rb->pick) > 0;
+ bch2_mark_io_failure(&failed, &rb->pick);
+
+ can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0;
if (!bio->bi_status &&
!bch2_btree_node_read_done(c, b, can_retry))
@@ -1408,7 +1407,7 @@ static void btree_node_read_endio(struct bio *bio)
void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
bool sync)
{
- struct extent_pick_ptr pick;
+ struct extent_ptr_decoded pick;
struct btree_read_bio *rb;
struct bch_dev *ca;
struct bio *bio;
@@ -1425,7 +1424,9 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b,
ca = bch_dev_bkey_exists(c, pick.ptr.dev);
- bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
+ bio = bio_alloc_bioset(GFP_NOIO, buf_pages(b->data,
+ btree_bytes(c)),
+ &c->btree_bio);
rb = container_of(bio, struct btree_read_bio, bio);
rb->c = c;
rb->start_time = local_clock();
@@ -1568,9 +1569,9 @@ retry:
new_key = bkey_i_to_extent(&tmp.k);
e = extent_i_to_s(new_key);
- extent_for_each_ptr_backwards(e, ptr)
- if (bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev))
- bch2_extent_drop_ptr(e, ptr);
+
+ bch2_extent_drop_ptrs(e, ptr,
+ bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
if (!bch2_extent_nr_ptrs(e.c))
goto err;
@@ -1880,7 +1881,9 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
trace_btree_write(b, bytes_to_write, sectors_to_write);
- wbio = container_of(bio_alloc_bioset(GFP_NOIO, 1 << order, &c->btree_bio),
+ wbio = container_of(bio_alloc_bioset(GFP_NOIO,
+ buf_pages(data, sectors_to_write << 9),
+ &c->btree_bio),
struct btree_write_bio, wbio.bio);
wbio_init(&wbio->wbio.bio);
wbio->data = data;
diff --git a/libbcachefs/btree_io.h b/libbcachefs/btree_io.h
index ccd47326..48833a98 100644
--- a/libbcachefs/btree_io.h
+++ b/libbcachefs/btree_io.h
@@ -14,7 +14,7 @@ struct btree_read_bio {
struct bch_fs *c;
u64 start_time;
unsigned have_ioref:1;
- struct extent_pick_ptr pick;
+ struct extent_ptr_decoded pick;
struct work_struct work;
struct bio bio;
};
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index e20dd7a2..44349159 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -414,11 +414,6 @@ static inline const struct bkey_ops *btree_node_ops(struct btree *b)
return &bch2_bkey_ops[btree_node_type(b)];
}
-static inline bool btree_node_has_ptrs(struct btree *b)
-{
- return btree_type_has_ptrs(btree_node_type(b));
-}
-
static inline bool btree_node_is_extents(struct btree *b)
{
return btree_node_type(b) == BKEY_TYPE_EXTENTS;
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 6d3fab8e..0a9d6919 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -210,11 +210,12 @@ found:
if (gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
struct bch_fs_usage tmp = { 0 };
- bch2_mark_key(c, bkey_i_to_s_c(&d->key),
- -c->opts.btree_node_size, BCH_DATA_BTREE, b
- ? gc_pos_btree_node(b)
- : gc_pos_btree_root(as->btree_id),
- &tmp, 0, 0);
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&d->key),
+ false, 0, b
+ ? gc_pos_btree_node(b)
+ : gc_pos_btree_root(as->btree_id),
+ &tmp, 0, 0);
/*
* Don't apply tmp - pending deletes aren't tracked in
* bch_alloc_stats:
@@ -289,10 +290,11 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
BUG_ON(!pending->index_update_done);
- bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
- -c->opts.btree_node_size, BCH_DATA_BTREE,
- gc_phase(GC_PHASE_PENDING_DELETE),
- &stats, 0, 0);
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&pending->key),
+ false, 0,
+ gc_phase(GC_PHASE_PENDING_DELETE),
+ &stats, 0, 0);
/*
* Don't apply stats - pending deletes aren't tracked in
* bch_alloc_stats:
@@ -550,7 +552,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c,
goto err_free;
}
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
goto err_free;
@@ -1091,8 +1093,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
__bch2_btree_set_root_inmem(c, b);
- bch2_mark_key(c, bkey_i_to_s_c(&b->key),
- c->opts.btree_node_size, BCH_DATA_BTREE,
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&b->key),
+ true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
@@ -1179,9 +1182,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
if (bkey_extent_is_data(&insert->k))
- bch2_mark_key(c, bkey_i_to_s_c(insert),
- c->opts.btree_node_size, BCH_DATA_BTREE,
- gc_pos_btree_node(b), &stats, 0, 0);
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(insert),
+ true, 0,
+ gc_pos_btree_node(b), &stats, 0, 0);
while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) &&
bkey_iter_pos_cmp(b, &insert->k.p, k) > 0)
@@ -1966,8 +1970,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_lock_write(b, iter);
- bch2_mark_key(c, bkey_i_to_s_c(&new_key->k_i),
- c->opts.btree_node_size, BCH_DATA_BTREE,
+ bch2_mark_key(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&new_key->k_i),
+ true, 0,
gc_pos_btree_root(b->btree_id),
&stats, 0, 0);
bch2_btree_node_free_index(as, NULL,
@@ -2062,7 +2067,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
goto err;
}
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
extent_i_to_s_c(new_key).s_c);
if (ret)
goto err_free_update;
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 052e8af8..271c02f1 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -533,27 +533,12 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
crc.uncompressed_size));
}
-/*
- * Checking against gc's position has to be done here, inside the cmpxchg()
- * loop, to avoid racing with the start of gc clearing all the marks - GC does
- * that with the gc pos seqlock held.
- */
-static void bch2_mark_pointer(struct bch_fs *c,
- struct bkey_s_c_extent e,
- const struct bch_extent_ptr *ptr,
- struct bch_extent_crc_unpacked crc,
- s64 sectors, enum bch_data_type data_type,
- unsigned replicas,
- struct bch_fs_usage *fs_usage,
- u64 journal_seq, unsigned flags)
+static s64 ptr_disk_sectors(struct bkey_s_c_extent e,
+ struct extent_ptr_decoded p,
+ s64 sectors)
{
- struct bucket_mark old, new;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = PTR_BUCKET(ca, ptr);
- s64 uncompressed_sectors = sectors;
- u64 v;
- if (crc.compression_type) {
+ if (p.crc.compression_type) {
unsigned old_sectors, new_sectors;
if (sectors > 0) {
@@ -564,23 +549,29 @@ static void bch2_mark_pointer(struct bch_fs *c,
new_sectors = e.k->size + sectors;
}
- sectors = -__disk_sectors(crc, old_sectors)
- +__disk_sectors(crc, new_sectors);
+ sectors = -__disk_sectors(p.crc, old_sectors)
+ +__disk_sectors(p.crc, new_sectors);
}
- /*
- * fs level usage (which determines free space) is in uncompressed
- * sectors, until copygc + compression is sorted out:
- *
- * note also that we always update @fs_usage, even when we otherwise
- * wouldn't do anything because gc is running - this is because the
- * caller still needs to account w.r.t. its disk reservation. It is
- * caller's responsibility to not apply @fs_usage if gc is in progress.
- */
- fs_usage->replicas
- [!ptr->cached && replicas ? replicas - 1 : 0].data
- [!ptr->cached ? data_type : BCH_DATA_CACHED] +=
- uncompressed_sectors;
+ return sectors;
+}
+
+/*
+ * Checking against gc's position has to be done here, inside the cmpxchg()
+ * loop, to avoid racing with the start of gc clearing all the marks - GC does
+ * that with the gc pos seqlock held.
+ */
+static void bch2_mark_pointer(struct bch_fs *c,
+ struct bkey_s_c_extent e,
+ struct extent_ptr_decoded p,
+ s64 sectors, enum bch_data_type data_type,
+ struct bch_fs_usage *fs_usage,
+ u64 journal_seq, unsigned flags)
+{
+ struct bucket_mark old, new;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ struct bucket *g = PTR_BUCKET(ca, &p.ptr);
+ u64 v;
if (flags & BCH_BUCKET_MARK_GC_WILL_VISIT) {
if (journal_seq)
@@ -601,14 +592,14 @@ static void bch2_mark_pointer(struct bch_fs *c,
* the allocator invalidating a bucket after we've already
* checked the gen
*/
- if (gen_after(new.gen, ptr->gen)) {
+ if (gen_after(new.gen, p.ptr.gen)) {
BUG_ON(!test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags));
- EBUG_ON(!ptr->cached &&
+ EBUG_ON(!p.ptr.cached &&
test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags));
return;
}
- if (!ptr->cached)
+ if (!p.ptr.cached)
checked_add(new.dirty_sectors, sectors);
else
checked_add(new.cached_sectors, sectors);
@@ -639,16 +630,64 @@ static void bch2_mark_pointer(struct bch_fs *c,
bucket_became_unavailable(c, old, new));
}
-void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors, enum bch_data_type data_type,
- struct gc_pos pos,
- struct bch_fs_usage *stats,
- u64 journal_seq, unsigned flags)
+static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
+ s64 sectors, enum bch_data_type data_type,
+ struct gc_pos pos,
+ struct bch_fs_usage *stats,
+ u64 journal_seq, unsigned flags)
{
unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
+ BUG_ON(!sectors);
+
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ extent_for_each_ptr_decode(e, p, entry) {
+ s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
+
+ /*
+ * fs level usage (which determines free space) is in
+ * uncompressed sectors, until copygc + compression is
+ * sorted out:
+ *
+ * note also that we always update @fs_usage, even when
+ * we otherwise wouldn't do anything because gc is
+ * running - this is because the caller still needs to
+ * account w.r.t. its disk reservation. It is caller's
+ * responsibility to not apply @fs_usage if gc is in
+ * progress.
+ */
+ stats->replicas
+ [!p.ptr.cached && replicas ? replicas - 1 : 0].data
+ [!p.ptr.cached ? data_type : BCH_DATA_CACHED] +=
+ sectors;
+
+ bch2_mark_pointer(c, e, p, disk_sectors, data_type,
+ stats, journal_seq, flags);
+ }
+ break;
+ }
+ case BCH_RESERVATION:
+ if (replicas)
+ stats->replicas[replicas - 1].persistent_reserved +=
+ sectors * replicas;
+ break;
+ }
+}
+void bch2_mark_key(struct bch_fs *c,
+ enum bkey_type type, struct bkey_s_c k,
+ bool inserting, s64 sectors,
+ struct gc_pos pos,
+ struct bch_fs_usage *stats,
+ u64 journal_seq, unsigned flags)
+{
/*
* synchronization w.r.t. GC:
*
@@ -685,24 +724,19 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
if (!stats)
stats = this_cpu_ptr(c->usage_percpu);
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
-
- BUG_ON(!sectors);
-
- extent_for_each_ptr_crc(e, ptr, crc)
- bch2_mark_pointer(c, e, ptr, crc, sectors, data_type,
- replicas, stats, journal_seq, flags);
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ bch2_mark_extent(c, k, inserting
+ ? c->opts.btree_node_size
+ : -c->opts.btree_node_size,
+ BCH_DATA_BTREE,
+ pos, stats, journal_seq, flags);
break;
- }
- case BCH_RESERVATION:
- if (replicas)
- stats->replicas[replicas - 1].persistent_reserved +=
- sectors * replicas;
+ case BKEY_TYPE_EXTENTS:
+ bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+ pos, stats, journal_seq, flags);
+ break;
+ default:
break;
}
percpu_up_read_preempt_enable(&c->usage_lock);
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index ff86d23e..d9fe938a 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -203,8 +203,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
#define BCH_BUCKET_MARK_GC_WILL_VISIT (1 << 2)
#define BCH_BUCKET_MARK_GC_LOCK_HELD (1 << 3)
-void bch2_mark_key(struct bch_fs *, struct bkey_s_c, s64, enum bch_data_type,
- struct gc_pos, struct bch_fs_usage *, u64, unsigned);
+void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+ bool, s64, struct gc_pos,
+ struct bch_fs_usage *, u64, unsigned);
void bch2_recalc_sectors_available(struct bch_fs *);
diff --git a/libbcachefs/clock.c b/libbcachefs/clock.c
index c67376f9..90b10cef 100644
--- a/libbcachefs/clock.c
+++ b/libbcachefs/clock.c
@@ -21,7 +21,7 @@ void bch2_io_timer_add(struct io_clock *clock, struct io_timer *timer)
if (clock->timers.data[i] == timer)
goto out;
- BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp));
+ BUG_ON(!heap_add(&clock->timers, timer, io_timer_cmp, NULL));
out:
spin_unlock(&clock->timer_lock);
}
@@ -34,7 +34,7 @@ void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer)
for (i = 0; i < clock->timers.used; i++)
if (clock->timers.data[i] == timer) {
- heap_del(&clock->timers, i, io_timer_cmp);
+ heap_del(&clock->timers, i, io_timer_cmp, NULL);
break;
}
@@ -127,7 +127,7 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
- heap_pop(&clock->timers, ret, io_timer_cmp);
+ heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
spin_unlock(&clock->timer_lock);
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 71f649bc..f69d76ec 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -35,7 +35,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
struct btree *v = c->verify_data;
struct btree_node *n_ondisk, *n_sorted, *n_inmemory;
struct bset *sorted, *inmemory;
- struct extent_pick_ptr pick;
+ struct extent_ptr_decoded pick;
struct bch_dev *ca;
struct bio *bio;
@@ -62,7 +62,9 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
if (!bch2_dev_get_ioref(ca, READ))
return;
- bio = bio_alloc_bioset(GFP_NOIO, btree_pages(c), &c->btree_bio);
+ bio = bio_alloc_bioset(GFP_NOIO,
+ buf_pages(n_sorted, btree_bytes(c)),
+ &c->btree_bio);
bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_opf = REQ_OP_READ|REQ_META;
bio->bi_iter.bi_sector = pick.ptr.offset;
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index a4d7e52b..6eaa89c9 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -88,7 +88,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
memset(&nr, 0, sizeof(nr));
- heap_resort(iter, key_sort_cmp);
+ heap_resort(iter, key_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
if (!should_drop_next_key(iter, b)) {
@@ -101,7 +101,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst,
}
sort_key_next(iter, b, iter->data);
- heap_sift_down(iter, 0, key_sort_cmp);
+ heap_sift_down(iter, 0, key_sort_cmp, NULL);
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
@@ -122,20 +122,11 @@ bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
return NULL;
}
-bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
+void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev)
{
struct bch_extent_ptr *ptr;
- bool dropped = false;
- extent_for_each_ptr_backwards(e, ptr)
- if (ptr->dev == dev) {
- __bch2_extent_drop_ptr(e, ptr);
- dropped = true;
- }
-
- if (dropped)
- bch2_extent_drop_redundant_crcs(e);
- return dropped;
+ bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev);
}
const struct bch_extent_ptr *
@@ -231,21 +222,21 @@ unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e)
unsigned bch2_extent_is_compressed(struct bkey_s_c k)
{
- struct bkey_s_c_extent e;
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
unsigned ret = 0;
switch (k.k->type) {
case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- e = bkey_s_c_to_extent(k);
+ case BCH_EXTENT_CACHED: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
- extent_for_each_ptr_crc(e, ptr, crc)
- if (!ptr->cached &&
- crc.compression_type != BCH_COMPRESSION_NONE &&
- crc.compressed_size < crc.live_size)
- ret = max_t(unsigned, ret, crc.compressed_size);
+ extent_for_each_ptr_decode(e, p, entry)
+ if (!p.ptr.cached &&
+ p.crc.compression_type != BCH_COMPRESSION_NONE &&
+ p.crc.compressed_size < p.crc.live_size)
+ ret = max_t(unsigned, ret, p.crc.compressed_size);
+ }
}
return ret;
@@ -254,34 +245,50 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k)
bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e,
struct bch_extent_ptr m, u64 offset)
{
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
- extent_for_each_ptr_crc(e, ptr, crc)
- if (ptr->dev == m.dev &&
- ptr->gen == m.gen &&
- (s64) ptr->offset + crc.offset - bkey_start_offset(e.k) ==
+ extent_for_each_ptr_decode(e, p, entry)
+ if (p.ptr.dev == m.dev &&
+ p.ptr.gen == m.gen &&
+ (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) ==
(s64) m.offset - offset)
- return ptr;
+ return true;
- return NULL;
+ return false;
}
-/* Doesn't cleanup redundant crcs */
-void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
+union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e,
+ struct bch_extent_ptr *ptr)
{
+ union bch_extent_entry *dst;
+ union bch_extent_entry *src;
+
EBUG_ON(ptr < &e.v->start->ptr ||
ptr >= &extent_entry_last(e)->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
- memmove_u64s_down(ptr, ptr + 1,
- (u64 *) extent_entry_last(e) - (u64 *) (ptr + 1));
- e.k->u64s -= sizeof(*ptr) / sizeof(u64);
-}
-void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr)
-{
- __bch2_extent_drop_ptr(e, ptr);
- bch2_extent_drop_redundant_crcs(e);
+ src = to_entry(ptr + 1);
+
+ if (src != extent_entry_last(e) &&
+ extent_entry_type(src) == BCH_EXTENT_ENTRY_ptr) {
+ dst = to_entry(ptr);
+ } else {
+ extent_for_each_entry(e, dst) {
+ if (dst == to_entry(ptr))
+ break;
+
+ if (extent_entry_next(dst) == to_entry(ptr) &&
+ extent_entry_is_crc(dst))
+ break;
+ }
+ }
+
+ memmove_u64s_down(dst, src,
+ (u64 *) extent_entry_last(e) - (u64 *) src);
+ e.k->u64s -= (u64 *) src - (u64 *) dst;
+
+ return dst;
}
static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u,
@@ -323,38 +330,38 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked n)
{
struct bch_extent_crc_unpacked u;
- struct bch_extent_ptr *ptr;
+ struct extent_ptr_decoded p;
union bch_extent_entry *i;
+ bool ret = false;
/* Find a checksum entry that covers only live data: */
- if (!n.csum_type)
+ if (!n.csum_type) {
extent_for_each_crc(extent_i_to_s(e), u, i)
if (!u.compression_type &&
u.csum_type &&
u.live_size == u.uncompressed_size) {
n = u;
- break;
+ goto found;
}
-
- if (!bch2_can_narrow_extent_crcs(extent_i_to_s_c(e), n))
return false;
-
+ }
+found:
BUG_ON(n.compression_type);
BUG_ON(n.offset);
BUG_ON(n.live_size != e->k.size);
- bch2_extent_crc_append(e, n);
restart_narrow_pointers:
- extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u)
- if (can_narrow_crc(u, n)) {
- ptr->offset += u.offset;
- extent_ptr_append(e, *ptr);
- __bch2_extent_drop_ptr(extent_i_to_s(e), ptr);
+ extent_for_each_ptr_decode(extent_i_to_s(e), p, i)
+ if (can_narrow_crc(p.crc, n)) {
+ bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr);
+ p.ptr.offset += p.crc.offset;
+ p.crc = n;
+ bch2_extent_ptr_decoded_append(e, &p);
+ ret = true;
goto restart_narrow_pointers;
}
- bch2_extent_drop_redundant_crcs(extent_i_to_s(e));
- return true;
+ return ret;
}
/* returns true if not equal */
@@ -371,87 +378,13 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l,
bch2_crc_cmp(l.csum, r.csum));
}
-void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e)
-{
- union bch_extent_entry *entry = e.v->start;
- union bch_extent_crc *crc, *prev = NULL;
- struct bch_extent_crc_unpacked u, prev_u = { 0 };
-
- while (entry != extent_entry_last(e)) {
- union bch_extent_entry *next = extent_entry_next(entry);
- size_t crc_u64s = extent_entry_u64s(entry);
-
- if (!extent_entry_is_crc(entry))
- goto next;
-
- crc = entry_to_crc(entry);
- u = bch2_extent_crc_unpack(e.k, crc);
-
- if (next == extent_entry_last(e)) {
- /* crc entry with no pointers after it: */
- goto drop;
- }
-
- if (extent_entry_is_crc(next)) {
- /* no pointers before next crc entry: */
- goto drop;
- }
-
- if (prev && !bch2_crc_unpacked_cmp(u, prev_u)) {
- /* identical to previous crc entry: */
- goto drop;
- }
-
- if (!prev &&
- !u.csum_type &&
- !u.compression_type) {
- /* null crc entry: */
- union bch_extent_entry *e2;
-
- extent_for_each_entry_from(e, e2, extent_entry_next(entry)) {
- if (!extent_entry_is_ptr(e2))
- break;
-
- e2->ptr.offset += u.offset;
- }
- goto drop;
- }
-
- prev = crc;
- prev_u = u;
-next:
- entry = next;
- continue;
-drop:
- memmove_u64s_down(crc, next,
- (u64 *) extent_entry_last(e) - (u64 *) next);
- e.k->u64s -= crc_u64s;
- }
-
- EBUG_ON(bkey_val_u64s(e.k) && !bch2_extent_nr_ptrs(e.c));
-}
-
-static bool should_drop_ptr(const struct bch_fs *c,
- struct bkey_s_c_extent e,
- const struct bch_extent_ptr *ptr)
-{
- return ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr);
-}
-
static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e)
{
- struct bch_extent_ptr *ptr = &e.v->start->ptr;
- bool dropped = false;
-
- while ((ptr = extent_ptr_next(e, ptr)))
- if (should_drop_ptr(c, e.c, ptr)) {
- __bch2_extent_drop_ptr(e, ptr);
- dropped = true;
- } else
- ptr++;
+ struct bch_extent_ptr *ptr;
- if (dropped)
- bch2_extent_drop_redundant_crcs(e);
+ bch2_extent_drop_ptrs(e, ptr,
+ ptr->cached &&
+ ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr));
}
bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k)
@@ -475,6 +408,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k));
entry = extent_entry_next(entry)) {
switch (extent_entry_type(entry)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ break;
case BCH_EXTENT_ENTRY_crc32:
entry->crc32.csum = swab32(entry->crc32.csum);
break;
@@ -488,8 +423,6 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k)
entry->crc128.csum.lo = (__force __le64)
swab64((__force u64) entry->crc128.csum.lo);
break;
- case BCH_EXTENT_ENTRY_ptr:
- break;
}
}
break;
@@ -586,12 +519,45 @@ out:
return out - buf;
}
-static inline bool dev_latency_better(struct bch_fs *c,
- const struct bch_extent_ptr *ptr1,
- const struct bch_extent_ptr *ptr2)
+static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f,
+ unsigned dev)
+{
+ struct bch_dev_io_failures *i;
+
+ for (i = f->devs; i < f->devs + f->nr; i++)
+ if (i->dev == dev)
+ return i;
+
+ return NULL;
+}
+
+void bch2_mark_io_failure(struct bch_io_failures *failed,
+ struct extent_ptr_decoded *p)
+{
+ struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev);
+
+ if (!f) {
+ BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs));
+
+ f = &failed->devs[failed->nr++];
+ f->dev = p->ptr.dev;
+ f->nr_failed = 1;
+ f->nr_retries = 0;
+ } else {
+ f->nr_failed++;
+ }
+}
+
+/*
+ * returns true if p1 is better than p2:
+ */
+static inline bool ptr_better(struct bch_fs *c,
+ const struct extent_ptr_decoded p1,
+ const struct extent_ptr_decoded p2)
{
- struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev);
- struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev);
+ struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev);
+ struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev);
+
u64 l1 = atomic64_read(&dev1->cur_latency[READ]);
u64 l2 = atomic64_read(&dev2->cur_latency[READ]);
@@ -602,31 +568,29 @@ static inline bool dev_latency_better(struct bch_fs *c,
static int extent_pick_read_device(struct bch_fs *c,
struct bkey_s_c_extent e,
- struct bch_devs_mask *avoid,
- struct extent_pick_ptr *pick)
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick)
{
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ struct bch_dev_io_failures *f;
struct bch_dev *ca;
int ret = 0;
- extent_for_each_ptr_crc(e, ptr, crc) {
- ca = bch_dev_bkey_exists(c, ptr->dev);
+ extent_for_each_ptr_decode(e, p, entry) {
+ ca = bch_dev_bkey_exists(c, p.ptr.dev);
- if (ptr->cached && ptr_stale(ca, ptr))
+ if (p.ptr.cached && ptr_stale(ca, &p.ptr))
continue;
- if (avoid && test_bit(ptr->dev, avoid->d))
+ f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL;
+ if (f && f->nr_failed >= f->nr_retries)
continue;
- if (ret && !dev_latency_better(c, ptr, &pick->ptr))
+ if (ret && !ptr_better(c, p, *pick))
continue;
- *pick = (struct extent_pick_ptr) {
- .ptr = *ptr,
- .crc = crc,
- };
-
+ *pick = p;
ret = 1;
}
@@ -715,7 +679,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
goto err;
}
- if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) {
+ if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), k);
bch2_fs_bug(c,
@@ -752,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf,
}
int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b,
- struct bch_devs_mask *avoid,
- struct extent_pick_ptr *pick)
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick)
{
return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key),
- avoid, pick);
+ failed, pick);
}
/* Extents */
@@ -908,7 +872,7 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
static inline void extent_sort_sift(struct btree_node_iter_large *iter,
struct btree *b, size_t i)
{
- heap_sift_down(iter, i, extent_sort_cmp);
+ heap_sift_down(iter, i, extent_sort_cmp, NULL);
}
static inline void extent_sort_next(struct btree_node_iter_large *iter,
@@ -916,7 +880,7 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter,
struct btree_node_iter_set *i)
{
sort_key_next(iter, b, i);
- heap_sift_down(iter, i - iter->data, extent_sort_cmp);
+ heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL);
}
static void extent_sort_append(struct bch_fs *c,
@@ -964,7 +928,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
memset(&nr, 0, sizeof(nr));
- heap_resort(iter, extent_sort_cmp);
+ heap_resort(iter, extent_sort_cmp, NULL);
while (!bch2_btree_node_iter_large_end(iter)) {
lk = __btree_node_offset_to_key(b, _l->k);
@@ -1076,8 +1040,9 @@ static void bch2_add_sectors(struct extent_insert_state *s,
if (!sectors)
return;
- bch2_mark_key(c, k, sectors, BCH_DATA_USER, gc_pos_btree_node(b),
- &s->stats, s->trans->journal_res.seq, 0);
+ bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors,
+ gc_pos_btree_node(b), &s->stats,
+ s->trans->journal_res.seq, 0);
}
static void bch2_subtract_sectors(struct extent_insert_state *s,
@@ -1748,8 +1713,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
return;
}
- if (!bkey_extent_is_cached(e.k) &&
- !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) {
+ if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
bch2_bkey_val_to_text(c, btree_node_type(b),
buf, sizeof(buf), e.s_c);
bch2_fs_bug(c,
@@ -1853,25 +1817,25 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc,
void bch2_extent_crc_append(struct bkey_i_extent *e,
struct bch_extent_crc_unpacked new)
{
- struct bch_extent_crc_unpacked crc;
- const union bch_extent_entry *i;
-
- BUG_ON(new.compressed_size > new.uncompressed_size);
- BUG_ON(new.live_size != e->k.size);
- BUG_ON(!new.compressed_size || !new.uncompressed_size);
+ bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
+ __extent_entry_push(e);
+}
- /*
- * Look up the last crc entry, so we can check if we need to add
- * another:
- */
- extent_for_each_crc(extent_i_to_s(e), crc, i)
- ;
+void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e,
+ struct extent_ptr_decoded *p)
+{
+ struct bch_extent_crc_unpacked crc;
+ union bch_extent_entry *pos;
- if (!bch2_crc_unpacked_cmp(crc, new))
- return;
+ extent_for_each_crc(extent_i_to_s(e), crc, pos)
+ if (!bch2_crc_unpacked_cmp(crc, p->crc))
+ goto found;
- bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new);
- __extent_entry_push(e);
+ bch2_extent_crc_append(e, p->crc);
+ pos = extent_entry_last(extent_i_to_s(e));
+found:
+ p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr;
+ __extent_entry_insert(e, pos, to_entry(&p->ptr));
}
/*
@@ -1957,8 +1921,8 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
* other devices, it will still pick a pointer from avoid.
*/
int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
- struct bch_devs_mask *avoid,
- struct extent_pick_ptr *pick)
+ struct bch_io_failures *failed,
+ struct extent_ptr_decoded *pick)
{
int ret;
@@ -1969,7 +1933,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k,
case BCH_EXTENT:
case BCH_EXTENT_CACHED:
ret = extent_pick_read_device(c, bkey_s_c_to_extent(k),
- avoid, pick);
+ failed, pick);
if (!ret && !bkey_extent_is_cached(k.k))
ret = -EIO;
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 66a02f1c..e04cb5a9 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -52,13 +52,14 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
struct btree *,
struct btree_node_iter_large *);
+void bch2_mark_io_failure(struct bch_io_failures *,
+ struct extent_ptr_decoded *);
int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *,
- struct bch_devs_mask *avoid,
- struct extent_pick_ptr *);
-
+ struct bch_io_failures *,
+ struct extent_ptr_decoded *);
int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c,
- struct bch_devs_mask *,
- struct extent_pick_ptr *);
+ struct bch_io_failures *,
+ struct extent_ptr_decoded *);
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
@@ -83,7 +84,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
const struct bch_extent_ptr *
bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
-bool bch2_extent_drop_device(struct bkey_s_extent, unsigned);
+void bch2_extent_drop_device(struct bkey_s_extent, unsigned);
const struct bch_extent_ptr *
bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned);
const struct bch_extent_ptr *
@@ -161,14 +162,11 @@ extent_entry_type(const union bch_extent_entry *e)
static inline size_t extent_entry_bytes(const union bch_extent_entry *entry)
{
switch (extent_entry_type(entry)) {
- case BCH_EXTENT_ENTRY_crc32:
- return sizeof(struct bch_extent_crc32);
- case BCH_EXTENT_ENTRY_crc64:
- return sizeof(struct bch_extent_crc64);
- case BCH_EXTENT_ENTRY_crc128:
- return sizeof(struct bch_extent_crc128);
- case BCH_EXTENT_ENTRY_ptr:
- return sizeof(struct bch_extent_ptr);
+#define x(f, n) \
+ case BCH_EXTENT_ENTRY_##f: \
+ return sizeof(struct bch_extent_##f);
+ BCH_EXTENT_ENTRY_TYPES()
+#undef x
default:
BUG();
}
@@ -181,12 +179,24 @@ static inline size_t extent_entry_u64s(const union bch_extent_entry *entry)
static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
- return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
+ switch (extent_entry_type(e)) {
+ case BCH_EXTENT_ENTRY_ptr:
+ return true;
+ default:
+ return false;
+ }
}
static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
{
- return !extent_entry_is_ptr(e);
+ switch (extent_entry_type(e)) {
+ case BCH_EXTENT_ENTRY_crc32:
+ case BCH_EXTENT_ENTRY_crc64:
+ case BCH_EXTENT_ENTRY_crc128:
+ return true;
+ default:
+ return false;
+ }
}
union bch_extent_crc {
@@ -200,11 +210,13 @@ union bch_extent_crc {
#define to_entry(_entry) \
({ \
BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \
- !type_is(_entry, struct bch_extent_ptr *)); \
+ !type_is(_entry, struct bch_extent_ptr *) && \
+ !type_is(_entry, struct bch_extent_stripe_ptr *)); \
\
__builtin_choose_expr( \
(type_is_exact(_entry, const union bch_extent_crc *) || \
- type_is_exact(_entry, const struct bch_extent_ptr *)), \
+ type_is_exact(_entry, const struct bch_extent_ptr *) ||\
+ type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\
(const union bch_extent_entry *) (_entry), \
(union bch_extent_entry *) (_entry)); \
})
@@ -234,44 +246,6 @@ union bch_extent_crc {
/* checksum entries: */
-enum bch_extent_crc_type {
- BCH_EXTENT_CRC_NONE,
- BCH_EXTENT_CRC32,
- BCH_EXTENT_CRC64,
- BCH_EXTENT_CRC128,
-};
-
-static inline enum bch_extent_crc_type
-__extent_crc_type(const union bch_extent_crc *crc)
-{
- if (!crc)
- return BCH_EXTENT_CRC_NONE;
-
- switch (extent_entry_type(to_entry(crc))) {
- case BCH_EXTENT_ENTRY_crc32:
- return BCH_EXTENT_CRC32;
- case BCH_EXTENT_ENTRY_crc64:
- return BCH_EXTENT_CRC64;
- case BCH_EXTENT_ENTRY_crc128:
- return BCH_EXTENT_CRC128;
- default:
- BUG();
- }
-}
-
-#define extent_crc_type(_crc) \
-({ \
- BUILD_BUG_ON(!type_is(_crc, struct bch_extent_crc32 *) && \
- !type_is(_crc, struct bch_extent_crc64 *) && \
- !type_is(_crc, struct bch_extent_crc128 *) && \
- !type_is(_crc, union bch_extent_crc *)); \
- \
- type_is(_crc, struct bch_extent_crc32 *) ? BCH_EXTENT_CRC32 \
- : type_is(_crc, struct bch_extent_crc64 *) ? BCH_EXTENT_CRC64 \
- : type_is(_crc, struct bch_extent_crc128 *) ? BCH_EXTENT_CRC128 \
- : __extent_crc_type((union bch_extent_crc *) _crc); \
-})
-
static inline struct bch_extent_crc_unpacked
bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
{
@@ -283,14 +257,15 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
.offset = _crc.offset, \
.live_size = k->size
- switch (extent_crc_type(crc)) {
- case BCH_EXTENT_CRC_NONE:
+ if (!crc)
return (struct bch_extent_crc_unpacked) {
.compressed_size = k->size,
.uncompressed_size = k->size,
.live_size = k->size,
};
- case BCH_EXTENT_CRC32: {
+
+ switch (extent_entry_type(to_entry(crc))) {
+ case BCH_EXTENT_ENTRY_crc32: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc32),
};
@@ -302,7 +277,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
return ret;
}
- case BCH_EXTENT_CRC64: {
+ case BCH_EXTENT_ENTRY_crc64: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc64),
.nonce = crc->crc64.nonce,
@@ -313,7 +288,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
return ret;
}
- case BCH_EXTENT_CRC128: {
+ case BCH_EXTENT_ENTRY_crc128: {
struct bch_extent_crc_unpacked ret = (struct bch_extent_crc_unpacked) {
common_fields(crc->crc128),
.nonce = crc->crc128.nonce,
@@ -346,23 +321,25 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
#define extent_for_each_entry(_e, _entry) \
extent_for_each_entry_from(_e, _entry, (_e).v->start)
-/* Iterate over crcs only: */
+/* Iterate over pointers only: */
-#define __extent_crc_next(_e, _p) \
+#define extent_ptr_next(_e, _ptr) \
({ \
- typeof(&(_e).v->start[0]) _entry = _p; \
+ typeof(&(_e).v->start[0]) _entry; \
\
- while ((_entry) < extent_entry_last(_e) && \
- !extent_entry_is_crc(_entry)) \
- (_entry) = extent_entry_next(_entry); \
+ extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \
+ if (extent_entry_is_ptr(_entry)) \
+ break; \
\
- entry_to_crc(_entry < extent_entry_last(_e) ? _entry : NULL); \
+ _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \
})
-#define __extent_for_each_crc(_e, _crc) \
- for ((_crc) = __extent_crc_next(_e, (_e).v->start); \
- (_crc); \
- (_crc) = __extent_crc_next(_e, extent_entry_next(to_entry(_crc))))
+#define extent_for_each_ptr(_e, _ptr) \
+ for ((_ptr) = &(_e).v->start->ptr; \
+ ((_ptr) = extent_ptr_next(_e, _ptr)); \
+ (_ptr)++)
+
+/* Iterate over crcs only: */
#define extent_crc_next(_e, _crc, _iter) \
({ \
@@ -383,69 +360,61 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc)
/* Iterate over pointers, with crcs: */
-#define extent_ptr_crc_next(_e, _ptr, _crc) \
+static inline struct extent_ptr_decoded
+__extent_ptr_decoded_init(const struct bkey *k)
+{
+ return (struct extent_ptr_decoded) {
+ .crc = bch2_extent_crc_unpack(k, NULL),
+ };
+}
+
+#define EXTENT_ITERATE_EC (1 << 0)
+
+#define __extent_ptr_next_decode(_e, _ptr, _entry) \
({ \
__label__ out; \
- typeof(&(_e).v->start[0]) _entry; \
\
- extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \
- if (extent_entry_is_crc(_entry)) { \
- (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_entry));\
- } else { \
- _ptr = entry_to_ptr(_entry); \
+ extent_for_each_entry_from(_e, _entry, _entry) \
+ switch (extent_entry_type(_entry)) { \
+ case BCH_EXTENT_ENTRY_ptr: \
+ (_ptr).ptr = _entry->ptr; \
goto out; \
+ case BCH_EXTENT_ENTRY_crc32: \
+ case BCH_EXTENT_ENTRY_crc64: \
+ case BCH_EXTENT_ENTRY_crc128: \
+ (_ptr).crc = bch2_extent_crc_unpack((_e).k, \
+ entry_to_crc(_entry)); \
+ break; \
} \
\
- _ptr = NULL; \
out: \
- _ptr; \
-})
-
-#define extent_for_each_ptr_crc(_e, _ptr, _crc) \
- for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \
- (_ptr) = &(_e).v->start->ptr; \
- ((_ptr) = extent_ptr_crc_next(_e, _ptr, _crc)); \
- (_ptr)++)
-
-/* Iterate over pointers only, and from a given position: */
-
-#define extent_ptr_next(_e, _ptr) \
-({ \
- struct bch_extent_crc_unpacked _crc; \
- \
- extent_ptr_crc_next(_e, _ptr, _crc); \
+ _entry < extent_entry_last(_e); \
})
-#define extent_for_each_ptr(_e, _ptr) \
- for ((_ptr) = &(_e).v->start->ptr; \
- ((_ptr) = extent_ptr_next(_e, _ptr)); \
- (_ptr)++)
-
-#define extent_ptr_prev(_e, _ptr) \
-({ \
- typeof(&(_e).v->start->ptr) _p; \
- typeof(&(_e).v->start->ptr) _prev = NULL; \
- \
- extent_for_each_ptr(_e, _p) { \
- if (_p == (_ptr)) \
- break; \
- _prev = _p; \
- } \
- \
- _prev; \
-})
+#define extent_for_each_ptr_decode(_e, _ptr, _entry) \
+ for ((_ptr) = __extent_ptr_decoded_init((_e).k), \
+ (_entry) = (_e).v->start; \
+ __extent_ptr_next_decode(_e, _ptr, _entry); \
+ (_entry) = extent_entry_next(_entry))
-/*
- * Use this when you'll be dropping pointers as you iterate. Quadratic,
- * unfortunately:
- */
-#define extent_for_each_ptr_backwards(_e, _ptr) \
- for ((_ptr) = extent_ptr_prev(_e, NULL); \
- (_ptr); \
- (_ptr) = extent_ptr_prev(_e, _ptr))
+/* Iterate over pointers backwards: */
void bch2_extent_crc_append(struct bkey_i_extent *,
struct bch_extent_crc_unpacked);
+void bch2_extent_ptr_decoded_append(struct bkey_i_extent *,
+ struct extent_ptr_decoded *);
+
+static inline void __extent_entry_insert(struct bkey_i_extent *e,
+ union bch_extent_entry *dst,
+ union bch_extent_entry *new)
+{
+ union bch_extent_entry *end = extent_entry_last(extent_i_to_s(e));
+
+ memmove_u64s_up((u64 *) dst + extent_entry_u64s(new),
+ dst, (u64 *) end - (u64 *) dst);
+ e->k.u64s += extent_entry_u64s(new);
+ memcpy(dst, new, extent_entry_bytes(new));
+}
static inline void __extent_entry_push(struct bkey_i_extent *e)
{
@@ -536,10 +505,23 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent,
struct bch_extent_crc_unpacked);
bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked);
-void bch2_extent_drop_redundant_crcs(struct bkey_s_extent);
-void __bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
-void bch2_extent_drop_ptr(struct bkey_s_extent, struct bch_extent_ptr *);
+union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent ,
+ struct bch_extent_ptr *);
+
+#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \
+do { \
+ _ptr = &(_e).v->start->ptr; \
+ \
+ while ((_ptr = extent_ptr_next(e, _ptr))) { \
+ if (_cond) { \
+ _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \
+ continue; \
+ } \
+ \
+ (_ptr)++; \
+ } \
+} while (0)
bool bch2_cut_front(struct bpos, struct bkey_i *);
bool bch2_cut_back(struct bpos, struct bkey *);
diff --git a/libbcachefs/extents_types.h b/libbcachefs/extents_types.h
index 76139f93..02c62567 100644
--- a/libbcachefs/extents_types.h
+++ b/libbcachefs/extents_types.h
@@ -18,9 +18,18 @@ struct bch_extent_crc_unpacked {
struct bch_csum csum;
};
-struct extent_pick_ptr {
- struct bch_extent_ptr ptr;
+struct extent_ptr_decoded {
struct bch_extent_crc_unpacked crc;
+ struct bch_extent_ptr ptr;
+};
+
+struct bch_io_failures {
+ u8 nr;
+ struct bch_dev_io_failures {
+ u8 dev;
+ u8 nr_failed;
+ u8 nr_retries;
+ } devs[BCH_REPLICAS_MAX];
};
#endif /* _BCACHEFS_EXTENTS_TYPES_H */
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 250dd55f..986bb7d2 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -963,12 +963,12 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter,
if (bkey_extent_is_data(k.k)) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- struct bch_extent_crc_unpacked crc;
const union bch_extent_entry *i;
+ struct extent_ptr_decoded p;
- extent_for_each_crc(e, crc, i)
- want_full_extent |= ((crc.csum_type != 0) |
- (crc.compression_type != 0));
+ extent_for_each_ptr_decode(e, p, i)
+ want_full_extent |= ((p.crc.csum_type != 0) |
+ (p.crc.compression_type != 0));
}
readpage_bio_extend(readpages_iter,
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index ae875870..1cf72910 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -973,27 +973,27 @@ static int bch2_fill_extent(struct fiemap_extent_info *info,
{
if (bkey_extent_is_data(&k->k)) {
struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k);
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
int ret;
- extent_for_each_ptr_crc(e, ptr, crc) {
+ extent_for_each_ptr_decode(e, p, entry) {
int flags2 = 0;
- u64 offset = ptr->offset;
+ u64 offset = p.ptr.offset;
- if (crc.compression_type)
+ if (p.crc.compression_type)
flags2 |= FIEMAP_EXTENT_ENCODED;
else
- offset += crc.offset;
+ offset += p.crc.offset;
if ((offset & (PAGE_SECTORS - 1)) ||
(e.k->size & (PAGE_SECTORS - 1)))
flags2 |= FIEMAP_EXTENT_NOT_ALIGNED;
ret = fiemap_fill_next_extent(info,
- bkey_start_offset(e.k) << 9,
- offset << 9,
- e.k->size << 9, flags|flags2);
+ bkey_start_offset(e.k) << 9,
+ offset << 9,
+ e.k->size << 9, flags|flags2);
if (ret)
return ret;
}
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 021a80df..eceb4865 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -310,9 +310,9 @@ static void __bch2_write_index(struct bch_write_op *op)
bkey_copy(dst, src);
e = bkey_i_to_s_extent(dst);
- extent_for_each_ptr_backwards(e, ptr)
- if (test_bit(ptr->dev, op->failed.d))
- bch2_extent_drop_ptr(e, ptr);
+
+ bch2_extent_drop_ptrs(e, ptr,
+ test_bit(ptr->dev, op->failed.d));
if (!bch2_extent_nr_ptrs(e.c)) {
ret = -EIO;
@@ -320,7 +320,8 @@ static void __bch2_write_index(struct bch_write_op *op)
}
if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, e.s_c);
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
+ e.s_c);
if (ret)
goto err;
}
@@ -1008,7 +1009,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
noinline
static struct promote_op *__promote_alloc(struct bch_fs *c,
struct bpos pos,
- struct extent_pick_ptr *pick,
+ struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
unsigned rbio_sectors,
struct bch_read_bio **rbio)
@@ -1089,7 +1090,7 @@ err:
static inline struct promote_op *promote_alloc(struct bch_fs *c,
struct bvec_iter iter,
struct bkey_s_c k,
- struct extent_pick_ptr *pick,
+ struct extent_ptr_decoded *pick,
struct bch_io_opts opts,
unsigned flags,
struct bch_read_bio **rbio,
@@ -1183,7 +1184,8 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed,
+ unsigned flags)
{
struct btree_iter iter;
BKEY_PADDED(k) tmp;
@@ -1217,7 +1219,7 @@ retry:
goto out;
}
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
@@ -1231,7 +1233,7 @@ out:
static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
struct bvec_iter bvec_iter, u64 inode,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -1254,7 +1256,7 @@ retry:
(k.k->p.offset - bvec_iter.bi_sector) << 9);
swap(bvec_iter.bi_size, bytes);
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, avoid, flags);
+ ret = __bch2_read_extent(c, rbio, bvec_iter, k, failed, flags);
switch (ret) {
case READ_RETRY:
goto retry;
@@ -1290,14 +1292,12 @@ static void bch2_rbio_retry(struct work_struct *work)
struct bvec_iter iter = rbio->bvec_iter;
unsigned flags = rbio->flags;
u64 inode = rbio->pos.inode;
- struct bch_devs_mask avoid;
+ struct bch_io_failures failed = { .nr = 0 };
trace_read_retry(&rbio->bio);
- memset(&avoid, 0, sizeof(avoid));
-
if (rbio->retry == READ_RETRY_AVOID)
- __set_bit(rbio->pick.ptr.dev, avoid.d);
+ bch2_mark_io_failure(&failed, &rbio->pick);
rbio->bio.bi_status = 0;
@@ -1307,9 +1307,9 @@ static void bch2_rbio_retry(struct work_struct *work)
flags &= ~BCH_READ_MAY_PROMOTE;
if (flags & BCH_READ_NODECODE)
- bch2_read_retry_nodecode(c, rbio, iter, inode, &avoid, flags);
+ bch2_read_retry_nodecode(c, rbio, iter, inode, &failed, flags);
else
- bch2_read_retry(c, rbio, iter, inode, &avoid, flags);
+ bch2_read_retry(c, rbio, iter, inode, &failed, flags);
}
static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
@@ -1396,7 +1396,7 @@ out:
}
static bool should_narrow_crcs(struct bkey_s_c k,
- struct extent_pick_ptr *pick,
+ struct extent_ptr_decoded *pick,
unsigned flags)
{
return !(flags & BCH_READ_IN_RETRY) &&
@@ -1549,9 +1549,9 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
- struct bch_devs_mask *avoid, unsigned flags)
+ struct bch_io_failures *failed, unsigned flags)
{
- struct extent_pick_ptr pick;
+ struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
struct promote_op *promote = NULL;
@@ -1559,7 +1559,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
struct bpos pos = bkey_start_pos(k.k);
int pick_ret;
- pick_ret = bch2_extent_pick_ptr(c, k, avoid, &pick);
+ pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
if (!pick_ret)
@@ -1723,7 +1723,7 @@ noclone:
rbio = bch2_rbio_free(rbio);
if (ret == READ_RETRY_AVOID) {
- __set_bit(pick.ptr.dev, avoid->d);
+ bch2_mark_io_failure(failed, &pick);
ret = READ_RETRY;
}
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
index 1724232f..5bd5f846 100644
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -94,10 +94,10 @@ static inline struct bch_write_bio *wbio_init(struct bio *bio)
struct bch_devs_mask;
struct cache_promote_op;
-struct extent_pick_ptr;
+struct extent_ptr_decoded;
int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *, struct bvec_iter,
- struct bkey_s_c, struct bch_devs_mask *, unsigned);
+ struct bkey_s_c, struct bch_io_failures *, unsigned);
void bch2_read(struct bch_fs *, struct bch_read_bio *, u64);
enum bch_read_flags {
diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h
index fe5779b3..8ec846cc 100644
--- a/libbcachefs/io_types.h
+++ b/libbcachefs/io_types.h
@@ -54,7 +54,7 @@ struct bch_read_bio {
struct bch_devs_list devs_have;
- struct extent_pick_ptr pick;
+ struct extent_ptr_decoded pick;
/* start pos of data we read (may not be pos of data we want) */
struct bpos pos;
struct bversion version;
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 6759810b..5870392e 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -352,10 +352,6 @@ static inline bool journal_flushes_device(struct bch_dev *ca)
return true;
}
-int bch2_journal_mark(struct bch_fs *, struct list_head *);
-void bch2_journal_entries_free(struct list_head *);
-int bch2_journal_replay(struct bch_fs *, struct list_head *);
-
static inline void bch2_journal_set_replay_done(struct journal *j)
{
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 2f88e242..0cb1bc3c 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -429,7 +429,6 @@ static int journal_read_bucket(struct bch_dev *ca,
{
struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
- struct bio *bio = ja->bio;
struct jset *j = NULL;
unsigned sectors, sectors_read = 0;
u64 offset = bucket_to_sector(ca, ja->buckets[bucket]),
@@ -441,10 +440,14 @@ static int journal_read_bucket(struct bch_dev *ca,
while (offset < end) {
if (!sectors_read) {
-reread: sectors_read = min_t(unsigned,
+ struct bio *bio;
+reread:
+ sectors_read = min_t(unsigned,
end - offset, buf->size >> 9);
- bio_reset(bio);
+ bio = bio_kmalloc(GFP_KERNEL,
+ buf_pages(buf->data,
+ sectors_read << 9));
bio_set_dev(bio, ca->disk_sb.bdev);
bio->bi_iter.bi_sector = offset;
bio->bi_iter.bi_size = sectors_read << 9;
@@ -452,6 +455,7 @@ reread: sectors_read = min_t(unsigned,
bch2_bio_map(bio, buf->data);
ret = submit_bio_wait(bio);
+ bio_put(bio);
if (bch2_dev_io_err_on(ret, ca,
"journal read from sector %llu",
@@ -849,28 +853,6 @@ fsck_err:
/* journal replay: */
-int bch2_journal_mark(struct bch_fs *c, struct list_head *list)
-{
- struct bkey_i *k, *n;
- struct jset_entry *j;
- struct journal_replay *r;
- int ret;
-
- list_for_each_entry(r, list, list)
- for_each_jset_key(k, n, j, &r->j) {
- enum bkey_type type = bkey_type(j->level, j->btree_id);
- struct bkey_s_c k_s_c = bkey_i_to_s_c(k);
-
- if (btree_type_has_ptrs(type)) {
- ret = bch2_btree_mark_key_initial(c, type, k_s_c);
- if (ret)
- return ret;
- }
- }
-
- return 0;
-}
-
int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
{
struct journal *j = &c->journal;
@@ -1064,14 +1046,19 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
* entry - that's why we drop pointers to devices <= current free space,
* i.e. whichever device was limiting the current journal entry size.
*/
- extent_for_each_ptr_backwards(e, ptr) {
- ca = bch_dev_bkey_exists(c, ptr->dev);
+ bch2_extent_drop_ptrs(e, ptr, ({
+ ca = bch_dev_bkey_exists(c, ptr->dev);
- if (ca->mi.state != BCH_MEMBER_STATE_RW ||
- ca->journal.sectors_free <= sectors)
- __bch2_extent_drop_ptr(e, ptr);
- else
- ca->journal.sectors_free -= sectors;
+ ca->mi.state != BCH_MEMBER_STATE_RW ||
+ ca->journal.sectors_free <= sectors;
+ }));
+
+ extent_for_each_ptr(e, ptr) {
+ ca = bch_dev_bkey_exists(c, ptr->dev);
+
+ BUG_ON(ca->mi.state != BCH_MEMBER_STATE_RW ||
+ ca->journal.sectors_free <= sectors);
+ ca->journal.sectors_free -= sectors;
}
replicas = bch2_extent_nr_ptrs(e.c);
diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h
index e303df92..d0a652cf 100644
--- a/libbcachefs/journal_io.h
+++ b/libbcachefs/journal_io.h
@@ -36,6 +36,8 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
int bch2_journal_read(struct bch_fs *, struct list_head *);
+void bch2_journal_entries_free(struct list_head *);
+int bch2_journal_replay(struct bch_fs *, struct list_head *);
int bch2_journal_entry_sectors(struct journal *);
void bch2_journal_write(struct closure *);
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index f5cbf44d..c0dfe1c6 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -50,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
if (ret)
break;
bch2_btree_iter_next(&iter);
@@ -71,7 +71,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/
bch2_extent_normalize(c, e.s);
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
bkey_i_to_s_c(&tmp.key));
if (ret)
break;
@@ -134,7 +134,7 @@ retry:
*/
bch2_btree_iter_downgrade(&iter);
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
if (ret)
goto err;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index e75e6e71..c9495ab7 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -67,8 +67,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct bkey_i_extent *insert, *new =
bkey_i_to_extent(bch2_keylist_front(keys));
BKEY_PADDED(k) _new, _insert;
- struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
bool did_work = false;
int nr;
@@ -98,15 +98,12 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k);
- if (m->data_cmd == DATA_REWRITE) {
- ptr = (struct bch_extent_ptr *)
- bch2_extent_has_device(extent_i_to_s_c(insert),
- m->data_opts.rewrite_dev);
- bch2_extent_drop_ptr(extent_i_to_s(insert), ptr);
- }
+ if (m->data_cmd == DATA_REWRITE)
+ bch2_extent_drop_device(extent_i_to_s(insert),
+ m->data_opts.rewrite_dev);
- extent_for_each_ptr_crc(extent_i_to_s(new), ptr, crc) {
- if (bch2_extent_has_device(extent_i_to_s_c(insert), ptr->dev)) {
+ extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
+ if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
/*
* raced with another move op? extent already
* has a pointer to the device we just wrote
@@ -115,8 +112,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
continue;
}
- bch2_extent_crc_append(insert, crc);
- extent_ptr_append(insert, *ptr);
+ bch2_extent_ptr_decoded_append(insert, &p);
did_work = true;
}
@@ -153,7 +149,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
goto next;
}
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
extent_i_to_s_c(insert).s_c);
if (ret)
break;
@@ -379,8 +375,8 @@ static int bch2_move_extent(struct bch_fs *c,
struct data_opts data_opts)
{
struct moving_io *io;
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
unsigned sectors = e.k->size, pages;
int ret = -ENOMEM;
@@ -393,8 +389,8 @@ static int bch2_move_extent(struct bch_fs *c,
SECTORS_IN_FLIGHT_PER_DEVICE);
/* write path might have to decompress data: */
- extent_for_each_ptr_crc(e, ptr, crc)
- sectors = max_t(unsigned, sectors, crc.uncompressed_size);
+ extent_for_each_ptr_decode(e, p, entry)
+ sectors = max_t(unsigned, sectors, p.crc.uncompressed_size);
pages = DIV_ROUND_UP(sectors, PAGE_SECTORS);
io = kzalloc(sizeof(struct moving_io) +
@@ -605,7 +601,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN,
BTREE_ITER_PREFETCH, k) {
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_USER, k);
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k);
if (ret)
break;
}
@@ -629,7 +625,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c)
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) {
- ret = bch2_mark_bkey_replicas(c, BCH_DATA_BTREE,
+ ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE,
bkey_i_to_s_c(&b->key));
bch2_btree_iter_cond_resched(&iter);
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 46886562..70318f2c 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -160,7 +160,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
.sectors = bucket_sectors_used(m),
.offset = bucket_to_sector(ca, b),
};
- heap_add_or_replace(h, e, -sectors_used_cmp);
+ heap_add_or_replace(h, e, -sectors_used_cmp, NULL);
}
up_read(&ca->bucket_lock);
up_read(&c->gc_lock);
@@ -169,7 +169,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
sectors_to_move += i->sectors;
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
- BUG_ON(!heap_pop(h, e, -sectors_used_cmp));
+ BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL));
sectors_to_move -= e.sectors;
}
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 3fbe7b10..85ea4c6b 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -17,17 +17,16 @@
#include <trace/events/bcachefs.h>
static inline bool rebalance_ptr_pred(struct bch_fs *c,
- const struct bch_extent_ptr *ptr,
- struct bch_extent_crc_unpacked crc,
+ struct extent_ptr_decoded p,
struct bch_io_opts *io_opts)
{
if (io_opts->background_target &&
- !bch2_dev_in_target(c, ptr->dev, io_opts->background_target) &&
- !ptr->cached)
+ !bch2_dev_in_target(c, p.ptr.dev, io_opts->background_target) &&
+ !p.ptr.cached)
return true;
if (io_opts->background_compression &&
- crc.compression_type !=
+ p.crc.compression_type !=
bch2_compression_opt_to_type[io_opts->background_compression])
return true;
@@ -38,8 +37,8 @@ void bch2_rebalance_add_key(struct bch_fs *c,
struct bkey_s_c k,
struct bch_io_opts *io_opts)
{
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
struct bkey_s_c_extent e;
if (!bkey_extent_is_data(k.k))
@@ -51,13 +50,13 @@ void bch2_rebalance_add_key(struct bch_fs *c,
e = bkey_s_c_to_extent(k);
- extent_for_each_ptr_crc(e, ptr, crc)
- if (rebalance_ptr_pred(c, ptr, crc, io_opts)) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ extent_for_each_ptr_decode(e, p, entry)
+ if (rebalance_ptr_pred(c, p, io_opts)) {
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- if (atomic64_add_return(crc.compressed_size,
+ if (atomic64_add_return(p.crc.compressed_size,
&ca->rebalance_work) ==
- crc.compressed_size)
+ p.crc.compressed_size)
rebalance_wakeup(c);
}
}
@@ -75,16 +74,16 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
/* Make sure we have room to add a new pointer: */
if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
BKEY_EXTENT_VAL_U64s_MAX)
return DATA_SKIP;
- extent_for_each_ptr_crc(e, ptr, crc)
- if (rebalance_ptr_pred(c, ptr, crc, io_opts))
+ extent_for_each_ptr_decode(e, p, entry)
+ if (rebalance_ptr_pred(c, p, io_opts))
goto found;
return DATA_SKIP;
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 1e94d35f..b0cef995 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -3,17 +3,32 @@
#include "replicas.h"
#include "super-io.h"
+struct bch_replicas_entry_padded {
+ struct bch_replicas_entry e;
+ u8 pad[BCH_SB_MEMBERS_MAX];
+};
+
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
struct bch_replicas_cpu *);
/* Replicas tracking - in memory: */
+static inline int u8_cmp(u8 l, u8 r)
+{
+ return (l > r) - (l < r);
+}
+
+static void replicas_entry_sort(struct bch_replicas_entry *e)
+{
+ bubble_sort(e->devs, e->nr_devs, u8_cmp);
+}
+
#define for_each_cpu_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \
(void *) (_i) < (void *) (_r)->entries + (_r)->nr * (_r)->entry_size;\
_i = (void *) (_i) + (_r)->entry_size)
-static inline struct bch_replicas_cpu_entry *
+static inline struct bch_replicas_entry *
cpu_replicas_entry(struct bch_replicas_cpu *r, unsigned i)
{
return (void *) r->entries + r->entry_size * i;
@@ -24,84 +39,79 @@ static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL);
}
-static inline bool replicas_test_dev(struct bch_replicas_cpu_entry *e,
- unsigned dev)
+static int replicas_entry_to_text(struct bch_replicas_entry *e,
+ char *buf, size_t size)
{
- return (e->devs[dev >> 3] & (1 << (dev & 7))) != 0;
-}
+ char *out = buf, *end = out + size;
+ unsigned i;
-static inline void replicas_set_dev(struct bch_replicas_cpu_entry *e,
- unsigned dev)
-{
- e->devs[dev >> 3] |= 1 << (dev & 7);
-}
+ out += scnprintf(out, end - out, "%u: [", e->data_type);
-static inline unsigned replicas_dev_slots(struct bch_replicas_cpu *r)
-{
- return (r->entry_size -
- offsetof(struct bch_replicas_cpu_entry, devs)) * 8;
+ for (i = 0; i < e->nr_devs; i++)
+ out += scnprintf(out, end - out,
+ i ? " %u" : "%u", e->devs[i]);
+ out += scnprintf(out, end - out, "]");
+
+ return out - buf;
}
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *r,
char *buf, size_t size)
{
char *out = buf, *end = out + size;
- struct bch_replicas_cpu_entry *e;
+ struct bch_replicas_entry *e;
bool first = true;
- unsigned i;
for_each_cpu_replicas_entry(r, e) {
- bool first_e = true;
-
if (!first)
out += scnprintf(out, end - out, " ");
first = false;
- out += scnprintf(out, end - out, "%u: [", e->data_type);
-
- for (i = 0; i < replicas_dev_slots(r); i++)
- if (replicas_test_dev(e, i)) {
- if (!first_e)
- out += scnprintf(out, end - out, " ");
- first_e = false;
- out += scnprintf(out, end - out, "%u", i);
- }
- out += scnprintf(out, end - out, "]");
+ out += replicas_entry_to_text(e, out, end - out);
}
return out - buf;
}
-static inline unsigned bkey_to_replicas(struct bkey_s_c_extent e,
- enum bch_data_type data_type,
- struct bch_replicas_cpu_entry *r,
- unsigned *max_dev)
+static void extent_to_replicas(struct bkey_s_c k,
+ struct bch_replicas_entry *r)
{
- const struct bch_extent_ptr *ptr;
- unsigned nr = 0;
-
- BUG_ON(!data_type ||
- data_type == BCH_DATA_SB ||
- data_type >= BCH_DATA_NR);
-
- memset(r, 0, sizeof(*r));
- r->data_type = data_type;
+ if (bkey_extent_is_data(k.k)) {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+
+ extent_for_each_ptr_decode(e, p, entry)
+ if (!p.ptr.cached)
+ r->devs[r->nr_devs++] = p.ptr.dev;
+ }
+}
- *max_dev = 0;
+static void bkey_to_replicas(enum bkey_type type,
+ struct bkey_s_c k,
+ struct bch_replicas_entry *e)
+{
+ e->nr_devs = 0;
+
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ e->data_type = BCH_DATA_BTREE;
+ extent_to_replicas(k, e);
+ break;
+ case BKEY_TYPE_EXTENTS:
+ e->data_type = BCH_DATA_USER;
+ extent_to_replicas(k, e);
+ break;
+ default:
+ break;
+ }
- extent_for_each_ptr(e, ptr)
- if (!ptr->cached) {
- *max_dev = max_t(unsigned, *max_dev, ptr->dev);
- replicas_set_dev(r, ptr->dev);
- nr++;
- }
- return nr;
+ replicas_entry_sort(e);
}
static inline void devlist_to_replicas(struct bch_devs_list devs,
enum bch_data_type data_type,
- struct bch_replicas_cpu_entry *r,
- unsigned *max_dev)
+ struct bch_replicas_entry *e)
{
unsigned i;
@@ -109,28 +119,24 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
data_type == BCH_DATA_SB ||
data_type >= BCH_DATA_NR);
- memset(r, 0, sizeof(*r));
- r->data_type = data_type;
+ e->data_type = data_type;
+ e->nr_devs = 0;
- *max_dev = 0;
+ for (i = 0; i < devs.nr; i++)
+ e->devs[e->nr_devs++] = devs.devs[i];
- for (i = 0; i < devs.nr; i++) {
- *max_dev = max_t(unsigned, *max_dev, devs.devs[i]);
- replicas_set_dev(r, devs.devs[i]);
- }
+ replicas_entry_sort(e);
}
static struct bch_replicas_cpu *
cpu_replicas_add_entry(struct bch_replicas_cpu *old,
- struct bch_replicas_cpu_entry new_entry,
- unsigned max_dev)
+ struct bch_replicas_entry *new_entry)
{
struct bch_replicas_cpu *new;
unsigned i, nr, entry_size;
- entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
- DIV_ROUND_UP(max_dev + 1, 8);
- entry_size = max(entry_size, old->entry_size);
+ entry_size = max_t(unsigned, old->entry_size,
+ replicas_entry_bytes(new_entry));
nr = old->nr + 1;
new = kzalloc(sizeof(struct bch_replicas_cpu) +
@@ -144,30 +150,28 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
for (i = 0; i < old->nr; i++)
memcpy(cpu_replicas_entry(new, i),
cpu_replicas_entry(old, i),
- min(new->entry_size, old->entry_size));
+ old->entry_size);
memcpy(cpu_replicas_entry(new, old->nr),
- &new_entry,
- new->entry_size);
+ new_entry,
+ replicas_entry_bytes(new_entry));
bch2_cpu_replicas_sort(new);
return new;
}
static bool replicas_has_entry(struct bch_replicas_cpu *r,
- struct bch_replicas_cpu_entry search,
- unsigned max_dev)
+ struct bch_replicas_entry *search)
{
- return max_dev < replicas_dev_slots(r) &&
+ return replicas_entry_bytes(search) <= r->entry_size &&
eytzinger0_find(r->entries, r->nr,
r->entry_size,
- memcmp, &search) < r->nr;
+ memcmp, search) < r->nr;
}
noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
- struct bch_replicas_cpu_entry new_entry,
- unsigned max_dev)
+ struct bch_replicas_entry *new_entry)
{
struct bch_replicas_cpu *old_gc, *new_gc = NULL, *old_r, *new_r = NULL;
int ret = -ENOMEM;
@@ -176,16 +180,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
old_gc = rcu_dereference_protected(c->replicas_gc,
lockdep_is_held(&c->sb_lock));
- if (old_gc && !replicas_has_entry(old_gc, new_entry, max_dev)) {
- new_gc = cpu_replicas_add_entry(old_gc, new_entry, max_dev);
+ if (old_gc && !replicas_has_entry(old_gc, new_entry)) {
+ new_gc = cpu_replicas_add_entry(old_gc, new_entry);
if (!new_gc)
goto err;
}
old_r = rcu_dereference_protected(c->replicas,
lockdep_is_held(&c->sb_lock));
- if (!replicas_has_entry(old_r, new_entry, max_dev)) {
- new_r = cpu_replicas_add_entry(old_r, new_entry, max_dev);
+ if (!replicas_has_entry(old_r, new_entry)) {
+ new_r = cpu_replicas_add_entry(old_r, new_entry);
if (!new_r)
goto err;
@@ -220,47 +224,63 @@ err:
return ret;
}
+static int __bch2_mark_replicas(struct bch_fs *c,
+ struct bch_replicas_entry *devs)
+{
+ struct bch_replicas_cpu *r, *gc_r;
+ bool marked;
+
+ rcu_read_lock();
+ r = rcu_dereference(c->replicas);
+ gc_r = rcu_dereference(c->replicas_gc);
+ marked = replicas_has_entry(r, devs) &&
+ (!likely(gc_r) || replicas_has_entry(gc_r, devs));
+ rcu_read_unlock();
+
+ return likely(marked) ? 0
+ : bch2_mark_replicas_slowpath(c, devs);
+}
+
int bch2_mark_replicas(struct bch_fs *c,
enum bch_data_type data_type,
struct bch_devs_list devs)
{
- struct bch_replicas_cpu_entry search;
- struct bch_replicas_cpu *r, *gc_r;
- unsigned max_dev;
- bool marked;
+ struct bch_replicas_entry_padded search;
if (!devs.nr)
return 0;
- BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
+ memset(&search, 0, sizeof(search));
- devlist_to_replicas(devs, data_type, &search, &max_dev);
+ BUG_ON(devs.nr >= BCH_REPLICAS_MAX);
- rcu_read_lock();
- r = rcu_dereference(c->replicas);
- gc_r = rcu_dereference(c->replicas_gc);
- marked = replicas_has_entry(r, search, max_dev) &&
- (!likely(gc_r) || replicas_has_entry(gc_r, search, max_dev));
- rcu_read_unlock();
+ devlist_to_replicas(devs, data_type, &search.e);
- return likely(marked) ? 0
- : bch2_mark_replicas_slowpath(c, search, max_dev);
+ return __bch2_mark_replicas(c, &search.e);
}
int bch2_mark_bkey_replicas(struct bch_fs *c,
- enum bch_data_type data_type,
+ enum bkey_type type,
struct bkey_s_c k)
{
- struct bch_devs_list cached = bch2_bkey_cached_devs(k);
- unsigned i;
+ struct bch_replicas_entry_padded search;
int ret;
- for (i = 0; i < cached.nr; i++)
- if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
- bch2_dev_list_single(cached.devs[i]))))
- return ret;
+ if (type == BKEY_TYPE_EXTENTS) {
+ struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+ unsigned i;
+
+ for (i = 0; i < cached.nr; i++)
+ if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED,
+ bch2_dev_list_single(cached.devs[i]))))
+ return ret;
+ }
+
+ bkey_to_replicas(type, k, &search.e);
- return bch2_mark_replicas(c, data_type, bch2_bkey_dirty_devs(k));
+ return search.e.nr_devs
+ ? __bch2_mark_replicas(c, &search.e)
+ : 0;
}
int bch2_replicas_gc_end(struct bch_fs *c, int ret)
@@ -303,7 +323,7 @@ err:
int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
{
struct bch_replicas_cpu *dst, *src;
- struct bch_replicas_cpu_entry *e;
+ struct bch_replicas_entry *e;
lockdep_assert_held(&c->replicas_gc_lock);
@@ -338,40 +358,19 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
/* Replicas tracking - superblock: */
-static void bch2_sb_replicas_nr_entries(struct bch_sb_field_replicas *r,
- unsigned *nr,
- unsigned *bytes,
- unsigned *max_dev)
-{
- struct bch_replicas_entry *i;
- unsigned j;
-
- *nr = 0;
- *bytes = sizeof(*r);
- *max_dev = 0;
-
- if (!r)
- return;
-
- for_each_replicas_entry(r, i) {
- for (j = 0; j < i->nr; j++)
- *max_dev = max_t(unsigned, *max_dev, i->devs[j]);
- (*nr)++;
- }
-
- *bytes = (void *) i - (void *) r;
-}
-
static struct bch_replicas_cpu *
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
{
+ struct bch_replicas_entry *e, *dst;
struct bch_replicas_cpu *cpu_r;
- unsigned i, nr, bytes, max_dev, entry_size;
-
- bch2_sb_replicas_nr_entries(sb_r, &nr, &bytes, &max_dev);
+ unsigned nr = 0, entry_size = 0;
- entry_size = offsetof(struct bch_replicas_cpu_entry, devs) +
- DIV_ROUND_UP(max_dev + 1, 8);
+ if (sb_r)
+ for_each_replicas_entry(sb_r, e) {
+ entry_size = max_t(unsigned, entry_size,
+ replicas_entry_bytes(e));
+ nr++;
+ }
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
nr * entry_size, GFP_NOIO);
@@ -381,20 +380,14 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
- if (nr) {
- struct bch_replicas_cpu_entry *dst =
- cpu_replicas_entry(cpu_r, 0);
- struct bch_replicas_entry *src = sb_r->entries;
-
- while (dst < cpu_replicas_entry(cpu_r, nr)) {
- dst->data_type = src->data_type;
- for (i = 0; i < src->nr; i++)
- replicas_set_dev(dst, src->devs[i]);
+ nr = 0;
- src = replicas_entry_next(src);
- dst = (void *) dst + entry_size;
+ if (sb_r)
+ for_each_replicas_entry(sb_r, e) {
+ dst = cpu_replicas_entry(cpu_r, nr++);
+ memcpy(dst, e, replicas_entry_bytes(e));
+ replicas_entry_sort(dst);
}
- }
bch2_cpu_replicas_sort(cpu_r);
return cpu_r;
@@ -422,20 +415,16 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
- struct bch_replicas_entry *sb_e;
- struct bch_replicas_cpu_entry *e;
- size_t i, bytes;
+ struct bch_replicas_entry *dst, *src;
+ size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
- for_each_cpu_replicas_entry(r, e) {
- bytes += sizeof(struct bch_replicas_entry);
- for (i = 0; i < r->entry_size - 1; i++)
- bytes += hweight8(e->devs[i]);
- }
+ for_each_cpu_replicas_entry(r, src)
+ bytes += replicas_entry_bytes(src);
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
- DIV_ROUND_UP(sizeof(*sb_r) + bytes, sizeof(u64)));
+ DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
return -ENOSPC;
@@ -443,22 +432,42 @@ static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
vstruct_end(&sb_r->field) -
(void *) &sb_r->entries);
- sb_e = sb_r->entries;
- for_each_cpu_replicas_entry(r, e) {
- sb_e->data_type = e->data_type;
+ dst = sb_r->entries;
+ for_each_cpu_replicas_entry(r, src) {
+ memcpy(dst, src, replicas_entry_bytes(src));
- for (i = 0; i < replicas_dev_slots(r); i++)
- if (replicas_test_dev(e, i))
- sb_e->devs[sb_e->nr++] = i;
+ dst = replicas_entry_next(dst);
- sb_e = replicas_entry_next(sb_e);
-
- BUG_ON((void *) sb_e > vstruct_end(&sb_r->field));
+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
}
return 0;
}
+static const char *check_dup_replicas_entries(struct bch_replicas_cpu *cpu_r)
+{
+ unsigned i;
+
+ sort_cmp_size(cpu_r->entries,
+ cpu_r->nr,
+ cpu_r->entry_size,
+ memcmp, NULL);
+
+ for (i = 0; i + 1 < cpu_r->nr; i++) {
+ struct bch_replicas_entry *l =
+ cpu_replicas_entry(cpu_r, i);
+ struct bch_replicas_entry *r =
+ cpu_replicas_entry(cpu_r, i + 1);
+
+ BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
+
+ if (!memcmp(l, r, cpu_r->entry_size))
+ return "duplicate replicas entry";
+ }
+
+ return NULL;
+}
+
static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_field *f)
{
struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
@@ -474,15 +483,15 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
goto err;
err = "invalid replicas entry: no devices";
- if (!e->nr)
+ if (!e->nr_devs)
goto err;
err = "invalid replicas entry: too many devices";
- if (e->nr >= BCH_REPLICAS_MAX)
+ if (e->nr_devs >= BCH_REPLICAS_MAX)
goto err;
err = "invalid replicas entry: invalid device";
- for (i = 0; i < e->nr; i++)
+ for (i = 0; i < e->nr_devs; i++)
if (!bch2_dev_exists(sb, mi, e->devs[i]))
goto err;
}
@@ -492,25 +501,7 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
if (!cpu_r)
goto err;
- sort_cmp_size(cpu_r->entries,
- cpu_r->nr,
- cpu_r->entry_size,
- memcmp, NULL);
-
- for (i = 0; i + 1 < cpu_r->nr; i++) {
- struct bch_replicas_cpu_entry *l =
- cpu_replicas_entry(cpu_r, i);
- struct bch_replicas_cpu_entry *r =
- cpu_replicas_entry(cpu_r, i + 1);
-
- BUG_ON(memcmp(l, r, cpu_r->entry_size) > 0);
-
- err = "duplicate replicas entry";
- if (!memcmp(l, r, cpu_r->entry_size))
- goto err;
- }
-
- err = NULL;
+ err = check_dup_replicas_entries(cpu_r);
err:
kfree(cpu_r);
return err;
@@ -525,7 +516,6 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
char *out = buf, *end = out + size;
struct bch_replicas_entry *e;
bool first = true;
- unsigned i;
if (!r) {
out += scnprintf(out, end - out, "(no replicas section found)");
@@ -537,12 +527,7 @@ int bch2_sb_replicas_to_text(struct bch_sb_field_replicas *r, char *buf, size_t
out += scnprintf(out, end - out, " ");
first = false;
- out += scnprintf(out, end - out, "%u: [", e->data_type);
-
- for (i = 0; i < e->nr; i++)
- out += scnprintf(out, end - out,
- i ? " %u" : "%u", e->devs[i]);
- out += scnprintf(out, end - out, "]");
+ out += replicas_entry_to_text(e, out, end - out);
}
return out - buf;
@@ -554,45 +539,59 @@ bool bch2_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type,
struct bch_devs_list devs)
{
- struct bch_replicas_cpu_entry search;
- unsigned max_dev;
+ struct bch_replicas_entry_padded search;
bool ret;
if (!devs.nr)
return true;
- devlist_to_replicas(devs, data_type, &search, &max_dev);
+ memset(&search, 0, sizeof(search));
+
+ devlist_to_replicas(devs, data_type, &search.e);
rcu_read_lock();
- ret = replicas_has_entry(rcu_dereference(c->replicas),
- search, max_dev);
+ ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
rcu_read_unlock();
return ret;
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
- enum bch_data_type data_type,
+ enum bkey_type type,
struct bkey_s_c k)
{
- struct bch_devs_list cached = bch2_bkey_cached_devs(k);
- unsigned i;
+ struct bch_replicas_entry_padded search;
+ bool ret;
- for (i = 0; i < cached.nr; i++)
- if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
- bch2_dev_list_single(cached.devs[i])))
- return false;
+ if (type == BKEY_TYPE_EXTENTS) {
+ struct bch_devs_list cached = bch2_bkey_cached_devs(k);
+ unsigned i;
- return bch2_replicas_marked(c, data_type, bch2_bkey_dirty_devs(k));
+ for (i = 0; i < cached.nr; i++)
+ if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
+ bch2_dev_list_single(cached.devs[i])))
+ return false;
+ }
+
+ bkey_to_replicas(type, k, &search.e);
+
+ if (!search.e.nr_devs)
+ return true;
+
+ rcu_read_lock();
+ ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
+ rcu_read_unlock();
+
+ return ret;
}
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_devs_mask online_devs)
{
struct bch_sb_field_members *mi;
- struct bch_replicas_cpu_entry *e;
+ struct bch_replicas_entry *e;
struct bch_replicas_cpu *r;
- unsigned i, dev, dev_slots, nr_online, nr_offline;
+ unsigned i, nr_online, nr_offline;
struct replicas_status ret;
memset(&ret, 0, sizeof(ret));
@@ -602,9 +601,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
mi = bch2_sb_get_members(c->disk_sb.sb);
rcu_read_lock();
-
r = rcu_dereference(c->replicas);
- dev_slots = replicas_dev_slots(r);
for_each_cpu_replicas_entry(r, e) {
if (e->data_type >= ARRAY_SIZE(ret.replicas))
@@ -612,13 +609,11 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
nr_online = nr_offline = 0;
- for (dev = 0; dev < dev_slots; dev++) {
- if (!replicas_test_dev(e, dev))
- continue;
-
- BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi, dev));
+ for (i = 0; i < e->nr_devs; i++) {
+ BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
+ e->devs[i]));
- if (test_bit(dev, online_devs.d))
+ if (test_bit(e->devs[i], online_devs.d))
nr_online++;
else
nr_offline++;
@@ -677,20 +672,18 @@ unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
{
- struct bch_replicas_cpu_entry *e;
+ struct bch_replicas_entry *e;
struct bch_replicas_cpu *r;
- unsigned ret = 0;
+ unsigned i, ret = 0;
rcu_read_lock();
r = rcu_dereference(c->replicas);
- if (ca->dev_idx >= replicas_dev_slots(r))
- goto out;
-
for_each_cpu_replicas_entry(r, e)
- if (replicas_test_dev(e, ca->dev_idx))
- ret |= 1 << e->data_type;
-out:
+ for (i = 0; i < e->nr_devs; i++)
+ if (e->devs[i] == ca->dev_idx)
+ ret |= 1 << e->data_type;
+
rcu_read_unlock();
return ret;
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index 49f114b0..640fe5b2 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -1,13 +1,15 @@
#ifndef _BCACHEFS_REPLICAS_H
#define _BCACHEFS_REPLICAS_H
+#include "replicas_types.h"
+
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
-bool bch2_bkey_replicas_marked(struct bch_fs *, enum bch_data_type,
+bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
-int bch2_mark_bkey_replicas(struct bch_fs *, enum bch_data_type,
+int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
struct bkey_s_c);
int bch2_cpu_replicas_to_text(struct bch_replicas_cpu *, char *, size_t);
@@ -33,11 +35,11 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
/* iterate over superblock replicas - used by userspace tools: */
-static inline struct bch_replicas_entry *
-replicas_entry_next(struct bch_replicas_entry *i)
-{
- return (void *) i + offsetof(struct bch_replicas_entry, devs) + i->nr;
-}
+#define replicas_entry_bytes(_i) \
+ (offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+#define replicas_entry_next(_i) \
+ ((typeof(_i)) ((void *) (_i) + replicas_entry_bytes(_i)))
#define for_each_replicas_entry(_r, _i) \
for (_i = (_r)->entries; \
diff --git a/libbcachefs/replicas_types.h b/libbcachefs/replicas_types.h
new file mode 100644
index 00000000..3061840b
--- /dev/null
+++ b/libbcachefs/replicas_types.h
@@ -0,0 +1,11 @@
+#ifndef _BCACHEFS_REPLICAS_TYPES_H
+#define _BCACHEFS_REPLICAS_TYPES_H
+
+struct bch_replicas_cpu {
+ struct rcu_head rcu;
+ unsigned nr;
+ unsigned entry_size;
+ struct bch_replicas_entry entries[];
+};
+
+#endif /* _BCACHEFS_REPLICAS_TYPES_H */
diff --git a/libbcachefs/super_types.h b/libbcachefs/super_types.h
index ab83ade9..ebb238aa 100644
--- a/libbcachefs/super_types.h
+++ b/libbcachefs/super_types.h
@@ -34,18 +34,6 @@ struct bch_member_cpu {
u8 valid;
};
-struct bch_replicas_cpu_entry {
- u8 data_type;
- u8 devs[BCH_SB_MEMBERS_MAX / 8];
-};
-
-struct bch_replicas_cpu {
- struct rcu_head rcu;
- unsigned nr;
- unsigned entry_size;
- struct bch_replicas_cpu_entry entries[];
-};
-
struct bch_disk_group_cpu {
bool deleted;
u16 parent;
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 3038b455..48126920 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -282,19 +282,19 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
if (k.k->type == BCH_EXTENT) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const struct bch_extent_ptr *ptr;
- struct bch_extent_crc_unpacked crc;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
- extent_for_each_ptr_crc(e, ptr, crc) {
- if (crc.compression_type == BCH_COMPRESSION_NONE) {
+ extent_for_each_ptr_decode(e, p, entry) {
+ if (p.crc.compression_type == BCH_COMPRESSION_NONE) {
nr_uncompressed_extents++;
uncompressed_sectors += e.k->size;
} else {
nr_compressed_extents++;
compressed_sectors_compressed +=
- crc.compressed_size;
+ p.crc.compressed_size;
compressed_sectors_uncompressed +=
- crc.uncompressed_size;
+ p.crc.uncompressed_size;
}
/* only looking at the first ptr */
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 5cfaed5b..4df96ef0 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -526,15 +526,17 @@ void bch2_bio_map(struct bio *bio, void *base)
BUG_ON(!bio->bi_iter.bi_size);
BUG_ON(bio->bi_vcnt);
+ BUG_ON(!bio->bi_max_vecs);
bv->bv_offset = base ? offset_in_page(base) : 0;
goto start;
for (; size; bio->bi_vcnt++, bv++) {
+ BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
+
bv->bv_offset = 0;
start: bv->bv_len = min_t(size_t, PAGE_SIZE - bv->bv_offset,
size);
- BUG_ON(bio->bi_vcnt >= bio->bi_max_vecs);
if (base) {
bv->bv_page = is_vmalloc_addr(base)
? vmalloc_to_page(base)
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 178bf983..433ba9c1 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -83,6 +83,14 @@ struct closure;
(__builtin_types_compatible_p(typeof(_val), _type) || \
__builtin_types_compatible_p(typeof(_val), const _type))
+/* Userspace doesn't align allocations as nicely as the kernel allocators: */
+static inline size_t buf_pages(void *p, size_t len)
+{
+ return DIV_ROUND_UP(len +
+ ((unsigned long) p & (PAGE_SIZE - 1)),
+ PAGE_SIZE);
+}
+
static inline void vpfree(void *p, size_t size)
{
if (is_vmalloc_addr(p))
@@ -137,7 +145,19 @@ do { \
(heap)->data = NULL; \
} while (0)
-#define heap_swap(h, i, j) swap((h)->data[i], (h)->data[j])
+#define heap_set_backpointer(h, i, _fn) \
+do { \
+ void (*fn)(typeof(h), size_t) = _fn; \
+ if (fn) \
+ fn(h, i); \
+} while (0)
+
+#define heap_swap(h, i, j, set_backpointer) \
+do { \
+ swap((h)->data[i], (h)->data[j]); \
+ heap_set_backpointer(h, i, set_backpointer); \
+ heap_set_backpointer(h, j, set_backpointer); \
+} while (0)
#define heap_peek(h) \
({ \
@@ -147,7 +167,7 @@ do { \
#define heap_full(h) ((h)->used == (h)->size)
-#define heap_sift_down(h, i, cmp) \
+#define heap_sift_down(h, i, cmp, set_backpointer) \
do { \
size_t _c, _j = i; \
\
@@ -159,72 +179,75 @@ do { \
\
if (cmp(h, (h)->data[_c], (h)->data[_j]) >= 0) \
break; \
- heap_swap(h, _c, _j); \
+ heap_swap(h, _c, _j, set_backpointer); \
} \
} while (0)
-#define heap_sift_up(h, i, cmp) \
+#define heap_sift_up(h, i, cmp, set_backpointer) \
do { \
while (i) { \
size_t p = (i - 1) / 2; \
if (cmp(h, (h)->data[i], (h)->data[p]) >= 0) \
break; \
- heap_swap(h, i, p); \
+ heap_swap(h, i, p, set_backpointer); \
i = p; \
} \
} while (0)
-#define __heap_add(h, d, cmp) \
-do { \
+#define __heap_add(h, d, cmp, set_backpointer) \
+({ \
size_t _i = (h)->used++; \
(h)->data[_i] = d; \
+ heap_set_backpointer(h, _i, set_backpointer); \
\
- heap_sift_up(h, _i, cmp); \
-} while (0)
+ heap_sift_up(h, _i, cmp, set_backpointer); \
+ _i; \
+})
-#define heap_add(h, d, cmp) \
+#define heap_add(h, d, cmp, set_backpointer) \
({ \
bool _r = !heap_full(h); \
if (_r) \
- __heap_add(h, d, cmp); \
+ __heap_add(h, d, cmp, set_backpointer); \
_r; \
})
-#define heap_add_or_replace(h, new, cmp) \
+#define heap_add_or_replace(h, new, cmp, set_backpointer) \
do { \
- if (!heap_add(h, new, cmp) && \
+ if (!heap_add(h, new, cmp, set_backpointer) && \
cmp(h, new, heap_peek(h)) >= 0) { \
(h)->data[0] = new; \
- heap_sift_down(h, 0, cmp); \
+ heap_set_backpointer(h, 0, set_backpointer); \
+ heap_sift_down(h, 0, cmp, set_backpointer); \
} \
} while (0)
-#define heap_del(h, i, cmp) \
+#define heap_del(h, i, cmp, set_backpointer) \
do { \
size_t _i = (i); \
\
BUG_ON(_i >= (h)->used); \
(h)->used--; \
- heap_swap(h, _i, (h)->used); \
- heap_sift_up(h, _i, cmp); \
- heap_sift_down(h, _i, cmp); \
+ heap_swap(h, _i, (h)->used, set_backpointer); \
+ heap_sift_up(h, _i, cmp, set_backpointer); \
+ heap_sift_down(h, _i, cmp, set_backpointer); \
} while (0)
-#define heap_pop(h, d, cmp) \
+#define heap_pop(h, d, cmp, set_backpointer) \
({ \
bool _r = (h)->used; \
if (_r) { \
(d) = (h)->data[0]; \
- heap_del(h, 0, cmp); \
+ heap_del(h, 0, cmp, set_backpointer); \
} \
_r; \
})
-#define heap_resort(heap, cmp) \
+#define heap_resort(heap, cmp, set_backpointer) \
do { \
ssize_t _i; \
for (_i = (ssize_t) (heap)->used / 2 - 1; _i >= 0; --_i) \
- heap_sift_down(heap, _i, cmp); \
+ heap_sift_down(heap, _i, cmp, set_backpointer); \
} while (0)
#define ANYSINT_MAX(t) \