summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-09-25 15:23:29 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2019-09-25 15:23:50 -0400
commitdb39aa3e1b528db3b9d731c3b054f27411e1e1a9 (patch)
tree1fad2b9a5f66cd9d7b7096c53604de691c47f26c /libbcachefs
parentceee9244dedcca3df57b76fafb772207cdfbd6ee (diff)
Update bcachefs sources to 5a3a4087af bcachefs: Convert a BUG_ON() to a warning
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/alloc_background.c2
-rw-r--r--libbcachefs/alloc_foreground.c5
-rw-r--r--libbcachefs/alloc_foreground.h2
-rw-r--r--libbcachefs/bcachefs_format.h2
-rw-r--r--libbcachefs/bkey_methods.c2
-rw-r--r--libbcachefs/bkey_methods.h3
-rw-r--r--libbcachefs/btree_cache.c6
-rw-r--r--libbcachefs/btree_gc.c2
-rw-r--r--libbcachefs/btree_iter.c420
-rw-r--r--libbcachefs/btree_iter.h16
-rw-r--r--libbcachefs/btree_locking.h2
-rw-r--r--libbcachefs/btree_types.h4
-rw-r--r--libbcachefs/btree_update.h29
-rw-r--r--libbcachefs/btree_update_leaf.c184
-rw-r--r--libbcachefs/buckets.c33
-rw-r--r--libbcachefs/checksum.c1
-rw-r--r--libbcachefs/ec.c8
-rw-r--r--libbcachefs/error.c9
-rw-r--r--libbcachefs/extents.c86
-rw-r--r--libbcachefs/extents.h18
-rw-r--r--libbcachefs/fs-io.c285
-rw-r--r--libbcachefs/fsck.c2
-rw-r--r--libbcachefs/io.c12
-rw-r--r--libbcachefs/move.c8
-rw-r--r--libbcachefs/rebalance.c45
-rw-r--r--libbcachefs/recovery.c5
-rw-r--r--libbcachefs/replicas.c22
-rw-r--r--libbcachefs/str_hash.h1
-rw-r--r--libbcachefs/super.c7
29 files changed, 743 insertions, 478 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 7a457729..9814179a 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -1164,7 +1164,7 @@ static int bch2_allocator_thread(void *arg)
*/
if (!nr ||
(nr < ALLOC_SCAN_BATCH(ca) &&
- !fifo_full(&ca->free[RESERVE_MOVINGGC]))) {
+ !fifo_empty(&ca->free[RESERVE_NONE]))) {
ret = wait_buckets_available(c, ca);
if (ret) {
up_read(&c->gc_lock);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index e64f8449..697d5768 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -693,8 +693,7 @@ retry_blocking:
}
void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
- struct open_buckets *obs,
- enum bch_data_type data_type)
+ struct open_buckets *obs)
{
struct open_buckets ptrs = { .nr = 0 };
struct open_bucket *ob, *ob2;
@@ -725,7 +724,7 @@ void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
struct write_point *wp)
{
mutex_lock(&wp->lock);
- bch2_open_buckets_stop_dev(c, ca, &wp->ptrs, wp->type);
+ bch2_open_buckets_stop_dev(c, ca, &wp->ptrs);
mutex_unlock(&wp->lock);
}
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index 6d8ffb0c..687f973e 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -106,7 +106,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *,
- struct open_buckets *, enum bch_data_type);
+ struct open_buckets *);
void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
struct write_point *);
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 667170b5..4577d77a 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -657,7 +657,7 @@ struct bch_reservation {
/* Maximum possible size of an entire extent value: */
#define BKEY_EXTENT_VAL_U64s_MAX \
- (BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
+ (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index 6fa6ac1f..f01405dd 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -145,7 +145,7 @@ void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k)
}
if (ops->key_debugcheck)
- ops->key_debugcheck(c, b, k);
+ ops->key_debugcheck(c, k);
}
void bch2_bpos_to_text(struct printbuf *out, struct bpos pos)
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index e6e97cda..8568b65c 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -26,8 +26,7 @@ struct bkey_ops {
/* Returns reason for being invalid if invalid, else NULL: */
const char * (*key_invalid)(const struct bch_fs *,
struct bkey_s_c);
- void (*key_debugcheck)(struct bch_fs *, struct btree *,
- struct bkey_s_c);
+ void (*key_debugcheck)(struct bch_fs *, struct bkey_s_c);
void (*val_to_text)(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void (*swab)(const struct bkey_format *, struct bkey_packed *);
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 046524c8..41694951 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -674,10 +674,7 @@ struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
EBUG_ON(!btree_node_locked(iter, level + 1));
EBUG_ON(level >= BTREE_MAX_DEPTH);
retry:
- rcu_read_lock();
b = btree_cache_find(bc, k);
- rcu_read_unlock();
-
if (unlikely(!b)) {
/*
* We must have the parent locked to call bch2_btree_node_fill(),
@@ -878,10 +875,7 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
BUG_ON(!btree_node_locked(iter, level + 1));
BUG_ON(level >= BTREE_MAX_DEPTH);
- rcu_read_lock();
b = btree_cache_find(bc, k);
- rcu_read_unlock();
-
if (b)
return;
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 5c77a955..f4adb07a 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -762,6 +762,8 @@ out:
percpu_down_write(&c->mark_lock);
bch2_gc_free(c);
percpu_up_write(&c->mark_lock);
+ /* flush fsck errors, reset counters */
+ bch2_flush_fsck_errs(c);
goto again;
}
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index a28d2dd7..40cd87d7 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -526,6 +526,10 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
unsigned offset = __btree_node_key_to_offset(b, where);
int shift = new_u64s - clobber_u64s;
unsigned old_end = t->end_offset - shift;
+ unsigned orig_iter_pos = node_iter->data[0].k;
+ bool iter_current_key_modified =
+ orig_iter_pos >= offset &&
+ orig_iter_pos <= offset + clobber_u64s;
btree_node_iter_for_each(node_iter, set)
if (set->end == old_end)
@@ -534,18 +538,12 @@ static void __bch2_btree_node_iter_fix(struct btree_iter *iter,
/* didn't find the bset in the iterator - might have to readd it: */
if (new_u64s &&
btree_iter_pos_cmp(iter, b, where) > 0) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
bch2_btree_node_iter_push(node_iter, b, where, end);
-
- if (!b->level &&
- node_iter == &iter->l[0].iter)
- bkey_disassemble(b,
- bch2_btree_node_iter_peek_all(node_iter, b),
- &iter->k);
+ goto fixup_done;
+ } else {
+ /* Iterator is after key that changed */
+ return;
}
-
- goto iter_current_key_not_modified;
found:
set->end = t->end_offset;
@@ -561,40 +559,25 @@ found:
if (set->k == set->end)
bch2_btree_node_iter_set_drop(node_iter, set);
} else {
+ /* Iterator is after key that changed */
set->k = (int) set->k + shift;
- goto iter_current_key_not_modified;
+ return;
}
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
-
bch2_btree_node_iter_sort(node_iter, b);
- if (!b->level && node_iter == &iter->l[0].iter) {
- /*
- * not legal to call bkey_debugcheck() here, because we're
- * called midway through the update path after update has been
- * marked but before deletes have actually happened:
- */
-#if 0
- __btree_iter_peek_all(iter, &iter->l[0], &iter->k);
-#endif
- struct btree_iter_level *l = &iter->l[0];
- struct bkey_packed *k =
- bch2_btree_node_iter_peek_all(&l->iter, l->b);
+fixup_done:
+ if (node_iter->data[0].k != orig_iter_pos)
+ iter_current_key_modified = true;
- if (unlikely(!k))
- iter->k.type = KEY_TYPE_deleted;
- else
- bkey_disassemble(l->b, k, &iter->k);
- }
-iter_current_key_not_modified:
/*
* When a new key is added, and the node iterator now points to that
* key, the iterator might have skipped past deleted keys that should
* come after the key the iterator now points to. We have to rewind to
- * before those deleted keys - otherwise bch2_btree_node_iter_prev_all()
- * breaks:
+ * before those deleted keys - otherwise
+ * bch2_btree_node_iter_prev_all() breaks:
*/
if (!bch2_btree_node_iter_end(node_iter) &&
+ iter_current_key_modified &&
(b->level ||
(iter->flags & BTREE_ITER_IS_EXTENTS))) {
struct bset_tree *t;
@@ -622,7 +605,21 @@ iter_current_key_not_modified:
}
}
- bch2_btree_node_iter_verify(node_iter, b);
+ if (!b->level &&
+ node_iter == &iter->l[0].iter &&
+ iter_current_key_modified) {
+ struct bkey_packed *k =
+ bch2_btree_node_iter_peek_all(node_iter, b);
+
+ if (likely(k)) {
+ bkey_disassemble(b, k, &iter->k);
+ } else {
+ /* XXX: for extents, calculate size of hole? */
+ iter->k.type = KEY_TYPE_deleted;
+ }
+
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
+ }
}
void bch2_btree_node_iter_fix(struct btree_iter *iter,
@@ -635,14 +632,18 @@ void bch2_btree_node_iter_fix(struct btree_iter *iter,
struct bset_tree *t = bch2_bkey_to_bset(b, where);
struct btree_iter *linked;
- if (node_iter != &iter->l[b->level].iter)
+ if (node_iter != &iter->l[b->level].iter) {
__bch2_btree_node_iter_fix(iter, b, node_iter, t,
- where, clobber_u64s, new_u64s);
+ where, clobber_u64s, new_u64s);
+ bch2_btree_node_iter_verify(node_iter, b);
+ }
- trans_for_each_iter_with_node(iter->trans, b, linked)
+ trans_for_each_iter_with_node(iter->trans, b, linked) {
__bch2_btree_node_iter_fix(linked, b,
- &linked->l[b->level].iter, t,
- where, clobber_u64s, new_u64s);
+ &linked->l[b->level].iter, t,
+ where, clobber_u64s, new_u64s);
+ __bch2_btree_iter_verify(linked, b);
+ }
}
static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter,
@@ -685,6 +686,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter,
bch2_btree_node_iter_peek(&l->iter, l->b));
}
+static inline struct bkey_s_c __btree_iter_prev(struct btree_iter *iter,
+ struct btree_iter_level *l)
+{
+ return __btree_iter_unpack(iter, l, &iter->k,
+ bch2_btree_node_iter_prev(&l->iter, l->b));
+}
+
static inline bool btree_iter_advance_to_pos(struct btree_iter *iter,
struct btree_iter_level *l,
int max_advance)
@@ -743,18 +751,29 @@ static void btree_iter_verify_new_node(struct btree_iter *iter, struct btree *b)
btree_node_unlock(iter, b->level + 1);
}
+static inline bool btree_iter_pos_before_node(struct btree_iter *iter,
+ struct btree *b)
+{
+ return bkey_cmp(iter->pos, b->data->min_key) < 0;
+}
+
static inline bool btree_iter_pos_after_node(struct btree_iter *iter,
struct btree *b)
{
- return __btree_iter_pos_cmp(iter, NULL,
- bkey_to_packed(&b->key), true) < 0;
+ int cmp = bkey_cmp(b->key.k.p, iter->pos);
+
+ if (!cmp &&
+ (iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ bkey_cmp(b->key.k.p, POS_MAX))
+ cmp = -1;
+ return cmp < 0;
}
static inline bool btree_iter_pos_in_node(struct btree_iter *iter,
struct btree *b)
{
return iter->btree_id == b->btree_id &&
- bkey_cmp(iter->pos, b->data->min_key) >= 0 &&
+ !btree_iter_pos_before_node(iter, b) &&
!btree_iter_pos_after_node(iter, b);
}
@@ -956,10 +975,10 @@ static void btree_iter_up(struct btree_iter *iter)
btree_node_unlock(iter, iter->level++);
}
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+static int btree_iter_traverse_one(struct btree_iter *);
static int __btree_iter_traverse_all(struct btree_trans *trans,
- struct btree_iter *orig_iter, int ret)
+ struct btree_iter *orig_iter, int ret)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter;
@@ -1003,7 +1022,7 @@ retry_all:
iter = &trans->iters[sorted[i]];
do {
- ret = __bch2_btree_iter_traverse(iter);
+ ret = btree_iter_traverse_one(iter);
} while (ret == -EINTR);
if (ret)
@@ -1021,16 +1040,27 @@ int bch2_btree_iter_traverse_all(struct btree_trans *trans)
return __btree_iter_traverse_all(trans, NULL, 0);
}
-static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
- bool check_pos)
+static inline bool btree_iter_good_node(struct btree_iter *iter,
+ unsigned l, int check_pos)
+{
+ if (!is_btree_node(iter, l) ||
+ !bch2_btree_node_relock(iter, l))
+ return false;
+
+ if (check_pos <= 0 && btree_iter_pos_before_node(iter, iter->l[l].b))
+ return false;
+ if (check_pos >= 0 && btree_iter_pos_after_node(iter, iter->l[l].b))
+ return false;
+ return true;
+}
+
+static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
+ int check_pos)
{
unsigned l = iter->level;
while (btree_iter_node(iter, l) &&
- (!is_btree_node(iter, l) ||
- !bch2_btree_node_relock(iter, l) ||
- (check_pos &&
- !btree_iter_pos_in_node(iter, iter->l[l].b)))) {
+ !btree_iter_good_node(iter, l, check_pos)) {
btree_node_unlock(iter, l);
iter->l[l].b = BTREE_ITER_NO_NODE_UP;
l++;
@@ -1048,7 +1078,7 @@ static unsigned btree_iter_up_until_locked(struct btree_iter *iter,
* On error, caller (peek_node()/peek_key()) must return NULL; the error is
* stashed in the iterator and returned from bch2_trans_exit().
*/
-int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
+static int btree_iter_traverse_one(struct btree_iter *iter)
{
unsigned depth_want = iter->level;
@@ -1062,7 +1092,7 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
* XXX: correctly using BTREE_ITER_UPTODATE should make using check_pos
* here unnecessary
*/
- iter->level = btree_iter_up_until_locked(iter, true);
+ iter->level = btree_iter_up_until_good_node(iter, 0);
/*
* If we've got a btree node locked (i.e. we aren't about to relock the
@@ -1070,8 +1100,11 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
*
* XXX correctly using BTREE_ITER_UPTODATE should make this unnecessary
*/
- if (btree_iter_node(iter, iter->level))
+ if (btree_iter_node(iter, iter->level)) {
+ BUG_ON(!btree_iter_pos_in_node(iter, iter->l[iter->level].b));
+
btree_iter_advance_to_pos(iter, &iter->l[iter->level], -1);
+ }
/*
* Note: iter->nodes[iter->level] may be temporarily NULL here - that
@@ -1100,12 +1133,12 @@ int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
return 0;
}
-int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
{
int ret;
ret = bch2_trans_cond_resched(iter->trans) ?:
- __bch2_btree_iter_traverse(iter);
+ btree_iter_traverse_one(iter);
if (unlikely(ret))
ret = __btree_iter_traverse_all(iter->trans, iter, ret);
@@ -1234,19 +1267,11 @@ void bch2_btree_iter_set_pos_same_leaf(struct btree_iter *iter, struct bpos new_
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
-void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+static unsigned btree_iter_pos_changed(struct btree_iter *iter, int cmp)
{
- int cmp = bkey_cmp(new_pos, iter->pos);
- unsigned level;
-
- if (!cmp)
- return;
-
- iter->pos = new_pos;
-
- level = btree_iter_up_until_locked(iter, true);
+ unsigned l = btree_iter_up_until_good_node(iter, cmp);
- if (btree_iter_node(iter, level)) {
+ if (btree_iter_node(iter, l)) {
/*
* We might have to skip over many keys, or just a few: try
* advancing the node iterator, and if we have to skip over too
@@ -1254,37 +1279,98 @@ void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
* is expensive).
*/
if (cmp < 0 ||
- !btree_iter_advance_to_pos(iter, &iter->l[level], 8))
- __btree_iter_init(iter, level);
+ !btree_iter_advance_to_pos(iter, &iter->l[l], 8))
+ __btree_iter_init(iter, l);
/* Don't leave it locked if we're not supposed to: */
- if (btree_lock_want(iter, level) == BTREE_NODE_UNLOCKED)
- btree_node_unlock(iter, level);
+ if (btree_lock_want(iter, l) == BTREE_NODE_UNLOCKED)
+ btree_node_unlock(iter, l);
}
- if (level != iter->level)
+ return l;
+}
+
+void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
+{
+ int cmp = bkey_cmp(new_pos, iter->pos);
+ unsigned l;
+
+ if (!cmp)
+ return;
+
+ iter->pos = new_pos;
+
+ l = btree_iter_pos_changed(iter, cmp);
+
+ if (l != iter->level)
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
else
btree_iter_set_dirty(iter, BTREE_ITER_NEED_PEEK);
}
+static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+
+ iter->pos = l->b->key.k.p;
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ if (!bkey_cmp(iter->pos, POS_MAX)) {
+ bkey_init(&iter->k);
+ iter->k.p = POS_MAX;
+ return false;
+ }
+
+ iter->pos = btree_type_successor(iter->btree_id, iter->pos);
+ btree_iter_pos_changed(iter, 1);
+ return true;
+}
+
+static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+
+ iter->pos = l->b->data->min_key;
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ if (!bkey_cmp(iter->pos, POS_MIN)) {
+ bkey_init(&iter->k);
+ iter->k.p = POS_MIN;
+ return false;
+ }
+
+ iter->pos = btree_type_predecessor(iter->btree_id, iter->pos);
+ btree_iter_pos_changed(iter, -1);
+ return true;
+}
+
static inline struct bkey_s_c btree_iter_peek_uptodate(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
struct bkey_s_c ret = { .k = &iter->k };
if (!bkey_deleted(&iter->k)) {
- EBUG_ON(bch2_btree_node_iter_end(&l->iter));
- ret.v = bkeyp_val(&l->b->format,
- __bch2_btree_node_iter_peek_all(&l->iter, l->b));
+ struct bkey_packed *_k =
+ __bch2_btree_node_iter_peek_all(&l->iter, l->b);
+
+ ret.v = bkeyp_val(&l->b->format, _k);
+
+ if (debug_check_iterators(iter->trans->c)) {
+ struct bkey k = bkey_unpack_key(l->b, _k);
+ BUG_ON(memcmp(&k, &iter->k, sizeof(k)));
+ }
+
+ if (debug_check_bkeys(iter->trans->c))
+ bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
}
- if (debug_check_bkeys(iter->trans->c) &&
- !bkey_deleted(ret.k))
- bch2_bkey_debugcheck(iter->trans->c, l->b, ret);
return ret;
}
+/**
+ * bch2_btree_iter_peek: returns first key greater than or equal to iterator's
+ * current position
+ */
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@@ -1297,24 +1383,16 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return btree_iter_peek_uptodate(iter);
while (1) {
- if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
- }
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
k = __btree_iter_peek(iter, l);
if (likely(k.k))
break;
- /* got to the end of the leaf, iterator needs to be traversed: */
- iter->pos = l->b->key.k.p;
- iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
-
- if (!bkey_cmp(iter->pos, POS_MAX))
+ if (!btree_iter_set_pos_to_next_leaf(iter))
return bkey_s_c_null;
-
- iter->pos = btree_type_successor(iter->btree_id, iter->pos);
}
/*
@@ -1329,22 +1407,10 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
return k;
}
-static noinline
-struct bkey_s_c bch2_btree_iter_peek_next_leaf(struct btree_iter *iter)
-{
- struct btree_iter_level *l = &iter->l[0];
-
- iter->pos = l->b->key.k.p;
- iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
-
- if (!bkey_cmp(iter->pos, POS_MAX))
- return bkey_s_c_null;
-
- iter->pos = btree_type_successor(iter->btree_id, iter->pos);
-
- return bch2_btree_iter_peek(iter);
-}
-
+/**
+ * bch2_btree_iter_next: returns first key greater than iterator's current
+ * position
+ */
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
@@ -1353,15 +1419,19 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
- iter->pos = btree_type_successor(iter->btree_id, iter->k.p);
-
if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+ if (unlikely(!bkey_cmp(iter->k.p, POS_MAX)))
+ return bkey_s_c_null;
+
/*
* XXX: when we just need to relock we should be able to avoid
* calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
* for that to work
*/
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ bch2_btree_iter_set_pos(iter,
+ btree_type_successor(iter->btree_id, iter->k.p));
return bch2_btree_iter_peek(iter);
}
@@ -1369,9 +1439,12 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
do {
bch2_btree_node_iter_advance(&l->iter, l->b);
p = bch2_btree_node_iter_peek_all(&l->iter, l->b);
- if (unlikely(!p))
- return bch2_btree_iter_peek_next_leaf(iter);
- } while (bkey_whiteout(p));
+ } while (likely(p) && bkey_whiteout(p));
+
+ if (unlikely(!p))
+ return btree_iter_set_pos_to_next_leaf(iter)
+ ? bch2_btree_iter_peek(iter)
+ : bkey_s_c_null;
k = __btree_iter_unpack(iter, l, &iter->k, p);
@@ -1380,51 +1453,79 @@ struct bkey_s_c bch2_btree_iter_next(struct btree_iter *iter)
return k;
}
-struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+/**
+ * bch2_btree_iter_peek_prev: returns first key less than or equal to
+ * iterator's current position
+ */
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
{
struct btree_iter_level *l = &iter->l[0];
- struct bkey_packed *p;
struct bkey_s_c k;
int ret;
bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
- if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
- k = bch2_btree_iter_peek(iter);
- if (IS_ERR(k.k))
- return k;
- }
+ if (iter->uptodate == BTREE_ITER_UPTODATE)
+ return btree_iter_peek_uptodate(iter);
while (1) {
- p = bch2_btree_node_iter_prev(&l->iter, l->b);
- if (likely(p))
- break;
-
- iter->pos = l->b->data->min_key;
- if (!bkey_cmp(iter->pos, POS_MIN))
- return bkey_s_c_null;
-
- bch2_btree_iter_set_pos(iter,
- btree_type_predecessor(iter->btree_id, iter->pos));
-
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
- p = bch2_btree_node_iter_peek(&l->iter, l->b);
- if (p)
+ k = __btree_iter_peek(iter, l);
+ if (!k.k ||
+ bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0)
+ k = __btree_iter_prev(iter, l);
+
+ if (likely(k.k))
break;
- }
- k = __btree_iter_unpack(iter, l, &iter->k, p);
+ if (!btree_iter_set_pos_to_prev_leaf(iter))
+ return bkey_s_c_null;
+ }
EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0);
-
iter->pos = bkey_start_pos(k.k);
iter->uptodate = BTREE_ITER_UPTODATE;
return k;
}
+/**
+ * bch2_btree_iter_prev: returns first key less than iterator's current
+ * position
+ */
+struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *iter)
+{
+ struct btree_iter_level *l = &iter->l[0];
+ struct bkey_s_c k;
+
+ bch2_btree_iter_checks(iter, BTREE_ITER_KEYS);
+
+ if (unlikely(iter->uptodate != BTREE_ITER_UPTODATE)) {
+ /*
+ * XXX: when we just need to relock we should be able to avoid
+ * calling traverse, but we need to kill BTREE_ITER_NEED_PEEK
+ * for that to work
+ */
+ iter->pos = btree_type_predecessor(iter->btree_id,
+ iter->pos);
+ iter->uptodate = BTREE_ITER_NEED_TRAVERSE;
+
+ return bch2_btree_iter_peek_prev(iter);
+ }
+
+ k = __btree_iter_prev(iter, l);
+ if (unlikely(!k.k))
+ return btree_iter_set_pos_to_prev_leaf(iter)
+ ? bch2_btree_iter_peek(iter)
+ : bkey_s_c_null;
+
+ EBUG_ON(bkey_cmp(bkey_start_pos(k.k), iter->pos) >= 0);
+ iter->pos = bkey_start_pos(k.k);
+ return k;
+}
+
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
@@ -1565,11 +1666,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
- if (iter->uptodate >= BTREE_ITER_NEED_RELOCK) {
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
- }
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return bkey_s_c_err(ret);
return __bch2_btree_iter_peek_slot(iter);
}
@@ -1671,7 +1770,10 @@ int bch2_trans_iter_free_on_commit(struct btree_trans *trans,
static int bch2_trans_realloc_iters(struct btree_trans *trans,
unsigned new_size)
{
- void *new_iters, *new_updates;
+ void *new_iters, *new_updates, *new_sorted;
+ size_t iters_bytes;
+ size_t updates_bytes;
+ size_t sorted_bytes;
new_size = roundup_pow_of_two(new_size);
@@ -1684,9 +1786,13 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
bch2_trans_unlock(trans);
- new_iters = kmalloc(sizeof(struct btree_iter) * new_size +
- sizeof(struct btree_insert_entry) * (new_size + 4),
- GFP_NOFS);
+ iters_bytes = sizeof(struct btree_iter) * new_size;
+ updates_bytes = sizeof(struct btree_insert_entry) * (new_size + 4);
+ sorted_bytes = sizeof(u8) * (new_size + 4);
+
+ new_iters = kmalloc(iters_bytes +
+ updates_bytes +
+ sorted_bytes, GFP_NOFS);
if (new_iters)
goto success;
@@ -1695,7 +1801,8 @@ static int bch2_trans_realloc_iters(struct btree_trans *trans,
trans->used_mempool = true;
success:
- new_updates = new_iters + sizeof(struct btree_iter) * new_size;
+ new_updates = new_iters + iters_bytes;
+ new_sorted = new_updates + updates_bytes;
memcpy(new_iters, trans->iters,
sizeof(struct btree_iter) * trans->nr_iters);
@@ -1710,9 +1817,10 @@ success:
if (trans->iters != trans->iters_onstack)
kfree(trans->iters);
- trans->iters = new_iters;
- trans->updates = new_updates;
- trans->size = new_size;
+ trans->iters = new_iters;
+ trans->updates = new_updates;
+ trans->updates_sorted = new_sorted;
+ trans->size = new_size;
if (trans->iters_live) {
trace_trans_restart_iters_realloced(trans->ip, trans->size);
@@ -1957,6 +2065,7 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->size = ARRAY_SIZE(trans->iters_onstack);
trans->iters = trans->iters_onstack;
trans->updates = trans->updates_onstack;
+ trans->updates_sorted = trans->updates_sorted_onstack;
trans->fs_usage_deltas = NULL;
if (expected_nr_iters > trans->size)
@@ -1981,3 +2090,18 @@ int bch2_trans_exit(struct btree_trans *trans)
return trans->error ? -EIO : 0;
}
+
+void bch2_fs_btree_iter_exit(struct bch_fs *c)
+{
+ mempool_exit(&c->btree_iters_pool);
+}
+
+int bch2_fs_btree_iter_init(struct bch_fs *c)
+{
+ unsigned nr = BTREE_ITER_MAX;
+
+ return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
+ sizeof(struct btree_iter) * nr +
+ sizeof(struct btree_insert_entry) * (nr + 4) +
+ sizeof(u8) * (nr + 4));
+}
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 249df21b..e4967215 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -134,7 +134,16 @@ void bch2_btree_iter_node_drop(struct btree_iter *, struct btree *);
void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
-int __must_check bch2_btree_iter_traverse(struct btree_iter *);
+int __must_check __bch2_btree_iter_traverse(struct btree_iter *);
+
+static inline int __must_check
+bch2_btree_iter_traverse(struct btree_iter *iter)
+{
+ return iter->uptodate >= BTREE_ITER_NEED_RELOCK
+ ? __bch2_btree_iter_traverse(iter)
+ : 0;
+}
+
int bch2_btree_iter_traverse_all(struct btree_trans *);
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
@@ -142,6 +151,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *, unsigned);
struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
+
+struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
@@ -303,4 +314,7 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
int bch2_trans_exit(struct btree_trans *);
+void bch2_fs_btree_iter_exit(struct bch_fs *);
+int bch2_fs_btree_iter_init(struct bch_fs *);
+
#endif /* _BCACHEFS_BTREE_ITER_H */
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index ea07ba19..592c3b4e 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -212,7 +212,7 @@ static inline void bch2_btree_node_lock_write(struct btree *b, struct btree_iter
EBUG_ON(iter->l[b->level].b != b);
EBUG_ON(iter->l[b->level].lock_seq != b->lock.state.seq);
- if (!six_trylock_write(&b->lock))
+ if (unlikely(!six_trylock_write(&b->lock)))
__bch2_btree_node_lock_write(b, iter);
}
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index f4e1bfe1..b0da0963 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -261,8 +261,6 @@ struct btree_insert_entry {
};
bool deferred;
- bool triggered;
- bool marked;
};
#define BTREE_ITER_MAX 64
@@ -291,6 +289,7 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
+ u8 *updates_sorted;
/* update path: */
struct journal_res journal_res;
@@ -302,6 +301,7 @@ struct btree_trans {
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
+ u8 updates_sorted_onstack[6];
struct replicas_delta_list *fs_usage_deltas;
};
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 616c103c..36e34b3d 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -43,7 +43,6 @@ enum {
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
- __BTREE_INSERT_NOMARK_INSERT,
__BTREE_INSERT_NOMARK_OVERWRITES,
__BTREE_INSERT_NOMARK,
__BTREE_INSERT_MARK_INMEM,
@@ -81,9 +80,6 @@ enum {
#define BTREE_INSERT_JOURNAL_RESERVED (1 << __BTREE_INSERT_JOURNAL_RESERVED)
-/* Don't mark new key, just overwrites: */
-#define BTREE_INSERT_NOMARK_INSERT (1 << __BTREE_INSERT_NOMARK_INSERT)
-
/* Don't mark overwrites, just new key: */
#define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
@@ -123,8 +119,13 @@ int bch2_trans_commit(struct btree_trans *,
struct disk_reservation *,
u64 *, unsigned);
-struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
- struct btree_insert_entry);
+static inline void bch2_trans_update(struct btree_trans *trans,
+ struct btree_insert_entry entry)
+{
+ EBUG_ON(trans->nr_updates >= trans->nr_iters + 4);
+
+ trans->updates[trans->nr_updates++] = entry;
+}
#define bch2_trans_do(_c, _journal_seq, _flags, _do) \
({ \
@@ -144,18 +145,6 @@ struct btree_insert_entry *bch2_trans_update(struct btree_trans *,
_ret; \
})
-/*
- * We sort transaction entries so that if multiple iterators point to the same
- * leaf node they'll be adjacent:
- */
-static inline bool same_leaf_as_prev(struct btree_trans *trans,
- struct btree_insert_entry *i)
-{
- return i != trans->updates &&
- !i->deferred &&
- i[0].iter->l[0].b == i[-1].iter->l[0].b;
-}
-
#define __trans_next_update(_trans, _i, _filter) \
({ \
while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
@@ -175,8 +164,4 @@ static inline bool same_leaf_as_prev(struct btree_trans *trans,
#define trans_for_each_update_iter(trans, i) \
__trans_for_each_update(trans, i, !(i)->deferred)
-#define trans_for_each_update_leaf(trans, i) \
- __trans_for_each_update(trans, i, !(i)->deferred && \
- !same_leaf_as_prev(trans, i))
-
#endif /* _BCACHEFS_BTREE_UPDATE_H */
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index c0a84153..7d983b21 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -19,12 +19,32 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+ unsigned sorted_idx)
+{
+ struct btree_insert_entry *i = trans->updates +
+ trans->updates_sorted[sorted_idx];
+ struct btree_insert_entry *prev = sorted_idx
+ ? trans->updates + trans->updates_sorted[sorted_idx - 1]
+ : NULL;
+
+ return !i->deferred &&
+ prev &&
+ i->iter->l[0].b == prev->iter->l[0].b;
+}
+
+#define trans_for_each_update_sorted(_trans, _i, _iter) \
+ for (_iter = 0; \
+ _iter < _trans->nr_updates && \
+ (_i = _trans->updates + _trans->updates_sorted[_iter], 1); \
+ _iter++)
+
inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
bch2_btree_node_lock_write(b, iter);
- if (btree_node_just_written(b) &&
+ if (unlikely(btree_node_just_written(b)) &&
bch2_btree_post_write_cleanup(c, b))
bch2_btree_iter_reinit_node(iter, b);
@@ -36,20 +56,21 @@ inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
bch2_btree_init_next(c, b, iter);
}
-static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
+static void btree_trans_lock_write(struct btree_trans *trans, bool lock)
{
+ struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
+ unsigned iter;
- trans_for_each_update_leaf(trans, i)
- bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
-}
-
-static void btree_trans_unlock_write(struct btree_trans *trans)
-{
- struct btree_insert_entry *i;
+ trans_for_each_update_sorted(trans, i, iter) {
+ if (same_leaf_as_prev(trans, iter))
+ continue;
- trans_for_each_update_leaf(trans, i)
- bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+ if (lock)
+ bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
+ else
+ bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+ }
}
static inline int btree_trans_cmp(struct btree_insert_entry l,
@@ -59,6 +80,30 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
btree_iter_cmp(l.iter, r.iter);
}
+static inline void btree_trans_sort_updates(struct btree_trans *trans)
+{
+ struct btree_insert_entry *l, *r;
+ unsigned nr = 0, pos;
+
+ trans_for_each_update(trans, l) {
+ for (pos = 0; pos < nr; pos++) {
+ r = trans->updates + trans->updates_sorted[pos];
+
+ if (btree_trans_cmp(*l, *r) <= 0)
+ break;
+ }
+
+ memmove(&trans->updates_sorted[pos + 1],
+ &trans->updates_sorted[pos],
+ (nr - pos) * sizeof(trans->updates_sorted[0]));
+
+ trans->updates_sorted[pos] = l - trans->updates;
+ nr++;
+ }
+
+ BUG_ON(nr != trans->nr_updates);
+}
+
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
@@ -106,7 +151,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
bch2_bset_delete(b, k, clobber_u64s);
bch2_btree_node_iter_fix(iter, b, node_iter,
k, clobber_u64s, 0);
- bch2_btree_iter_verify(iter, b);
return true;
}
@@ -116,7 +160,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
k->type = KEY_TYPE_deleted;
bch2_btree_node_iter_fix(iter, b, node_iter, k,
k->u64s, k->u64s);
- bch2_btree_iter_verify(iter, b);
if (bkey_whiteout(&insert->k)) {
reserve_whiteout(b, k);
@@ -138,10 +181,8 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter,
clobber_u64s = 0;
overwrite:
bch2_bset_insert(b, node_iter, k, insert, clobber_u64s);
- if (k->u64s != clobber_u64s || bkey_whiteout(&insert->k))
- bch2_btree_node_iter_fix(iter, b, node_iter, k,
- clobber_u64s, k->u64s);
- bch2_btree_iter_verify(iter, b);
+ bch2_btree_node_iter_fix(iter, b, node_iter, k,
+ clobber_u64s, k->u64s);
return true;
}
@@ -488,12 +529,12 @@ static int btree_trans_check_can_insert(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct btree_insert_entry *i;
- unsigned u64s = 0;
+ unsigned iter, u64s = 0;
int ret;
- trans_for_each_update_iter(trans, i) {
+ trans_for_each_update_sorted(trans, i, iter) {
/* Multiple inserts might go to same leaf: */
- if (!same_leaf_as_prev(trans, i))
+ if (!same_leaf_as_prev(trans, iter))
u64s = 0;
u64s += i->k->k.u64s;
@@ -542,7 +583,6 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
- bool saw_non_marked;
unsigned mark_flags = trans->flags & BTREE_INSERT_BUCKET_INVALIDATE
? BCH_BUCKET_MARK_BUCKET_INVALIDATE
: 0;
@@ -551,35 +591,32 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
+ /*
+ * note: running triggers will append more updates to the list of
+ * updates as we're walking it:
+ */
trans_for_each_update_iter(trans, i)
- i->marked = false;
-
- do {
- saw_non_marked = false;
-
- trans_for_each_update_iter(trans, i) {
- if (i->marked)
- continue;
-
- saw_non_marked = true;
- i->marked = true;
-
- if (update_has_triggers(trans, i) &&
- update_triggers_transactional(trans, i)) {
- ret = bch2_trans_mark_update(trans, i->iter, i->k);
- if (ret == -EINTR)
- trace_trans_restart_mark(trans->ip);
- if (ret)
- goto out_clear_replicas;
- }
+ if (update_has_triggers(trans, i) &&
+ update_triggers_transactional(trans, i)) {
+ ret = bch2_trans_mark_update(trans, i->iter, i->k);
+ if (ret == -EINTR)
+ trace_trans_restart_mark(trans->ip);
+ if (ret)
+ goto out_clear_replicas;
}
- } while (saw_non_marked);
- trans_for_each_update(trans, i)
- btree_insert_entry_checks(trans, i);
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
+ trans_for_each_update(trans, i)
+ btree_insert_entry_checks(trans, i);
bch2_btree_trans_verify_locks(trans);
- btree_trans_lock_write(c, trans);
+ /*
+ * No more updates can be added - sort updates so we can take write
+ * locks in the correct order:
+ */
+ btree_trans_sort_updates(trans);
+
+ btree_trans_lock_write(trans, true);
if (race_fault()) {
ret = -EINTR;
@@ -597,8 +634,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
goto out;
trans_for_each_update_iter(trans, i) {
- if (i->deferred ||
- !btree_node_type_needs_gc(i->iter->btree_id))
+ if (!btree_node_type_needs_gc(i->iter->btree_id))
continue;
if (!fs_usage) {
@@ -664,7 +700,7 @@ out:
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
- btree_trans_unlock_write(trans);
+ btree_trans_lock_write(trans, false);
if (fs_usage) {
bch2_fs_usage_scratch_put(c, fs_usage);
@@ -689,19 +725,6 @@ int bch2_trans_commit_error(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
unsigned flags = trans->flags;
- struct btree_insert_entry *src, *dst;
-
- src = dst = trans->updates;
-
- while (src < trans->updates + trans->nr_updates) {
- if (!src->triggered) {
- *dst = *src;
- dst++;
- }
- src++;
- }
-
- trans->nr_updates = dst - trans->updates;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
@@ -816,6 +839,7 @@ static int __bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
+ unsigned iter;
int ret;
trans_for_each_update_iter(trans, i) {
@@ -837,8 +861,10 @@ static int __bch2_trans_commit(struct btree_trans *trans,
if (trans->flags & BTREE_INSERT_NOUNLOCK)
trans->nounlock = true;
- trans_for_each_update_leaf(trans, i)
- bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
+ trans_for_each_update_sorted(trans, i, iter)
+ if (!same_leaf_as_prev(trans, iter))
+ bch2_foreground_maybe_merge(c, i->iter,
+ 0, trans->flags);
trans->nounlock = false;
@@ -858,7 +884,8 @@ int bch2_trans_commit(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i = NULL;
- unsigned orig_mem_top = trans->mem_top;
+ unsigned orig_nr_updates = trans->nr_updates;
+ unsigned orig_mem_top = trans->mem_top;
int ret = 0;
if (!trans->nr_updates)
@@ -931,39 +958,20 @@ out_noupdates:
err:
ret = bch2_trans_commit_error(trans, i, ret);
+ /* free updates and memory used by triggers, they'll be reexecuted: */
+ trans->nr_updates = orig_nr_updates;
+ trans->mem_top = orig_mem_top;
+
/* can't loop if it was passed in and we changed it: */
if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
ret = -EINTR;
- if (!ret) {
- /* free memory used by triggers, they'll be reexecuted: */
- trans->mem_top = orig_mem_top;
+ if (!ret)
goto retry;
- }
goto out;
}
-struct btree_insert_entry *bch2_trans_update(struct btree_trans *trans,
- struct btree_insert_entry entry)
-{
- struct btree_insert_entry *i;
-
- BUG_ON(trans->nr_updates >= trans->nr_iters + 4);
-
- for (i = trans->updates;
- i < trans->updates + trans->nr_updates;
- i++)
- if (btree_trans_cmp(entry, *i) < 0)
- break;
-
- memmove(&i[1], &i[0],
- (void *) &trans->updates[trans->nr_updates] - (void *) i);
- trans->nr_updates++;
- *i = entry;
- return i;
-}
-
/**
* bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 1516df22..6a4773a9 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -1265,11 +1265,10 @@ int bch2_mark_update(struct btree_trans *trans,
if (!btree_node_type_needs_gc(iter->btree_id))
return 0;
- if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
- bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
- 0, insert->k->k.size,
- fs_usage, trans->journal_res.seq,
- BCH_BUCKET_MARK_INSERT|flags);
+ bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k),
+ 0, insert->k->k.size,
+ fs_usage, trans->journal_res.seq,
+ BCH_BUCKET_MARK_INSERT|flags);
if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
return 0;
@@ -1359,11 +1358,8 @@ static int trans_get_key(struct btree_trans *trans,
struct btree_insert_entry *i;
int ret;
- for (i = trans->updates;
- i < trans->updates + trans->nr_updates;
- i++)
- if (!i->deferred &&
- i->iter->btree_id == btree_id &&
+ trans_for_each_update_iter(trans, i)
+ if (i->iter->btree_id == btree_id &&
(btree_node_type_is_extents(btree_id)
? bkey_cmp(pos, bkey_start_pos(&i->k->k)) >= 0 &&
bkey_cmp(pos, i->k->k.p) < 0
@@ -1391,8 +1387,8 @@ static void *trans_update_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned u64s)
{
+ struct btree_insert_entry *i;
struct bkey_i *new_k;
- unsigned i;
new_k = bch2_trans_kmalloc(trans, u64s * sizeof(u64));
if (IS_ERR(new_k))
@@ -1401,19 +1397,13 @@ static void *trans_update_key(struct btree_trans *trans,
bkey_init(&new_k->k);
new_k->k.p = iter->pos;
- for (i = 0; i < trans->nr_updates; i++)
- if (!trans->updates[i].deferred &&
- trans->updates[i].iter == iter) {
- trans->updates[i].k = new_k;
+ trans_for_each_update_iter(trans, i)
+ if (i->iter == iter) {
+ i->k = new_k;
return new_k;
}
- bch2_trans_update(trans, ((struct btree_insert_entry) {
- .iter = iter,
- .k = new_k,
- .triggered = true,
- }));
-
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, new_k));
return new_k;
}
@@ -1496,6 +1486,7 @@ static int bch2_trans_mark_pointer(struct btree_trans *trans,
bch2_fs_inconsistent_on(overflow, c,
"bucket sector count overflow: %u + %lli > U16_MAX",
old, sectors);
+ BUG_ON(overflow);
a = trans_update_key(trans, iter, BKEY_ALLOC_U64s_MAX);
ret = PTR_ERR_OR_ZERO(a);
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c
index e55aa98c..a5c947e8 100644
--- a/libbcachefs/checksum.c
+++ b/libbcachefs/checksum.c
@@ -127,7 +127,6 @@ static void gen_poly_key(struct bch_fs *c, struct shash_desc *desc,
do_encrypt(c->chacha20, nonce, key, sizeof(key));
desc->tfm = c->poly1305;
- desc->flags = 0;
crypto_shash_init(desc);
crypto_shash_update(desc, key, sizeof(key));
}
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 0742d2c1..be2eca0f 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1173,12 +1173,8 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
struct ec_stripe_new *s = NULL;
mutex_lock(&h->lock);
- bch2_open_buckets_stop_dev(c, ca,
- &h->blocks,
- BCH_DATA_USER);
- bch2_open_buckets_stop_dev(c, ca,
- &h->parity,
- BCH_DATA_USER);
+ bch2_open_buckets_stop_dev(c, ca, &h->blocks);
+ bch2_open_buckets_stop_dev(c, ca, &h->parity);
if (!h->s)
goto unlock;
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 1aaff44e..304ff925 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -4,6 +4,8 @@
#include "io.h"
#include "super.h"
+#define FSCK_ERR_RATELIMIT_NR 10
+
bool bch2_inconsistent_error(struct bch_fs *c)
{
set_bit(BCH_FS_ERROR, &c->flags);
@@ -97,8 +99,8 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
found:
list_move(&s->list, &c->fsck_errors);
s->nr++;
- suppressing = s->nr == 10;
- print = s->nr <= 10;
+ suppressing = s->nr == FSCK_ERR_RATELIMIT_NR;
+ print = s->nr <= FSCK_ERR_RATELIMIT_NR;
buf = s->buf;
print:
va_start(args, fmt);
@@ -152,10 +154,9 @@ void bch2_flush_fsck_errs(struct bch_fs *c)
struct fsck_err_state *s, *n;
mutex_lock(&c->fsck_error_lock);
- set_bit(BCH_FS_FSCK_DONE, &c->flags);
list_for_each_entry_safe(s, n, &c->fsck_errors, list) {
- if (s->nr > 10)
+ if (s->nr > FSCK_ERR_RATELIMIT_NR)
bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->buf);
list_del(&s->list);
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index ecebd791..e10ea43b 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -672,8 +672,7 @@ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k)
return bch2_bkey_ptrs_invalid(c, k);
}
-void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
- struct bkey_s_c k)
+void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const struct bch_extent_ptr *ptr;
@@ -877,13 +876,6 @@ static void verify_extent_nonoverlapping(struct bch_fs *c,
#endif
}
-static void verify_modified_extent(struct btree_iter *iter,
- struct bkey_packed *k)
-{
- bch2_btree_iter_verify(iter, iter->l[0].b);
- bch2_verify_insert_pos(iter->l[0].b, k, k, k->u64s);
-}
-
static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
struct bkey_i *insert)
{
@@ -896,6 +888,9 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
verify_extent_nonoverlapping(c, l->b, &l->iter, insert);
+ if (debug_check_bkeys(c))
+ bch2_bkey_debugcheck(c, l->b, bkey_i_to_s_c(insert));
+
node_iter = l->iter;
k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard);
if (k && !bkey_written(l->b, k) &&
@@ -922,7 +917,6 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_insert(l->b, &l->iter, k, insert, 0);
bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
- bch2_btree_iter_verify(iter, l->b);
}
static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
@@ -942,12 +936,13 @@ static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
return ret;
}
-static int __bch2_extent_atomic_end(struct btree_trans *trans,
- struct bkey_s_c k,
- unsigned offset,
- struct bpos *end,
- unsigned *nr_iters,
- unsigned max_iters)
+static int count_iters_for_insert(struct btree_trans *trans,
+ struct bkey_s_c k,
+ unsigned offset,
+ struct bpos *end,
+ unsigned *nr_iters,
+ unsigned max_iters,
+ bool overwrite)
{
int ret = 0;
@@ -977,6 +972,20 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
break;
*nr_iters += 1;
+
+ if (overwrite &&
+ k.k->type == KEY_TYPE_reflink_v) {
+ struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+ if (le64_to_cpu(r.v->refcount) == 1)
+ *nr_iters += bch2_bkey_nr_alloc_ptrs(k);
+ }
+
+ /*
+ * if we're going to be deleting an entry from
+ * the reflink btree, need more iters...
+ */
+
if (*nr_iters >= max_iters) {
struct bpos pos = bkey_start_pos(k.k);
pos.offset += r_k.k->p.offset - idx;
@@ -994,11 +1003,11 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
return ret;
}
-int bch2_extent_atomic_end(struct btree_trans *trans,
- struct btree_iter *iter,
+int bch2_extent_atomic_end(struct btree_iter *iter,
struct bkey_i *insert,
struct bpos *end)
{
+ struct btree_trans *trans = iter->trans;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
struct bkey_packed *_k;
@@ -1011,8 +1020,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
*end = bpos_min(insert->k.p, b->key.k.p);
- ret = __bch2_extent_atomic_end(trans, bkey_i_to_s_c(insert),
- 0, end, &nr_iters, 10);
+ ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert),
+ 0, end, &nr_iters, 10, false);
if (ret)
return ret;
@@ -1031,8 +1040,8 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
offset = bkey_start_offset(&insert->k) -
bkey_start_offset(k.k);
- ret = __bch2_extent_atomic_end(trans, k, offset,
- end, &nr_iters, 20);
+ ret = count_iters_for_insert(trans, k, offset,
+ end, &nr_iters, 20, true);
if (ret)
return ret;
@@ -1050,7 +1059,7 @@ int bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
struct bpos end;
int ret;
- ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
+ ret = bch2_extent_atomic_end(iter, k, &end);
if (ret)
return ret;
@@ -1063,7 +1072,7 @@ int bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
struct bpos end;
int ret;
- ret = bch2_extent_atomic_end(iter->trans, iter, k, &end);
+ ret = bch2_extent_atomic_end(iter, k, &end);
if (ret)
return ret;
@@ -1137,15 +1146,16 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
__bch2_cut_front(insert->k.p, k);
- BUG_ON(bkey_deleted(k.k));
+ EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
- verify_modified_extent(iter, _k);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+ _k, _k->u64s, _k->u64s);
break;
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
bch2_cut_back(bkey_start_pos(&insert->k), k.k);
- BUG_ON(bkey_deleted(k.k));
+ EBUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
/*
@@ -1156,7 +1166,6 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_fix_invalidated_key(l->b, _k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
- verify_modified_extent(iter, _k);
break;
case BCH_EXTENT_OVERLAP_ALL: {
@@ -1173,12 +1182,10 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
bch2_bset_delete(l->b, _k, _k->u64s);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, u64s, 0);
- bch2_btree_iter_verify(iter, l->b);
} else {
extent_save(l->b, _k, k.k);
bch2_btree_node_iter_fix(iter, l->b, &l->iter,
_k, _k->u64s, _k->u64s);
- verify_modified_extent(iter, _k);
}
break;
@@ -1208,7 +1215,8 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
__bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
- verify_modified_extent(iter, _k);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+ _k, _k->u64s, _k->u64s);
extent_bset_insert(c, iter, &split.k);
break;
@@ -1265,6 +1273,8 @@ static void __bch2_insert_fixup_extent(struct bch_fs *c,
btree_account_key_drop(l->b, _k);
_k->type = KEY_TYPE_discard;
reserve_whiteout(l->b, _k);
+ bch2_btree_node_iter_fix(iter, l->b, &l->iter,
+ _k, _k->u64s, _k->u64s);
}
break;
}
@@ -1359,10 +1369,6 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
- if (debug_check_bkeys(c))
- bch2_bkey_debugcheck(c, iter->l[0].b,
- bkey_i_to_s_c(&tmp.k));
-
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
@@ -1387,8 +1393,7 @@ const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
return bch2_bkey_ptrs_invalid(c, k);
}
-void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b,
- struct bkey_s_c k)
+void bch2_extent_debugcheck(struct bch_fs *c, struct bkey_s_c k)
{
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
@@ -1762,6 +1767,12 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
if (ret == BCH_MERGE_NOMERGE)
return false;
+ if (debug_check_bkeys(c))
+ bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&li.k));
+ if (debug_check_bkeys(c) &&
+ ret == BCH_MERGE_PARTIAL)
+ bch2_bkey_debugcheck(c, b, bkey_i_to_s_c(&ri.k));
+
/*
* check if we overlap with deleted extents - would break the sort
* order:
@@ -1798,7 +1809,6 @@ static bool bch2_extent_merge_inline(struct bch_fs *c,
bch2_bset_fix_invalidated_key(b, m);
bch2_btree_node_iter_fix(iter, b, node_iter,
m, m->u64s, m->u64s);
- verify_modified_extent(iter, m);
return ret == BCH_MERGE_MERGE;
}
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 189ae4c7..613d76af 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -389,8 +389,7 @@ const char *bch2_bkey_ptrs_invalid(const struct bch_fs *, struct bkey_s_c);
/* bch_btree_ptr: */
const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *,
- struct bkey_s_c);
+void bch2_btree_ptr_debugcheck(struct bch_fs *, struct bkey_s_c);
void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
@@ -405,7 +404,7 @@ void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *);
/* bch_extent: */
const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c);
-void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c);
+void bch2_extent_debugcheck(struct bch_fs *, struct bkey_s_c);
void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
enum merge_result bch2_extent_merge(struct bch_fs *,
@@ -433,8 +432,8 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
.key_merge = bch2_reservation_merge, \
}
-int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
- struct bkey_i *, struct bpos *);
+int bch2_extent_atomic_end(struct btree_iter *, struct bkey_i *,
+ struct bpos *);
int bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
int bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
@@ -455,12 +454,11 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c);
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
struct bch_extent_ptr, u64);
-static inline bool bkey_extent_is_data(const struct bkey *k)
+static inline bool bkey_extent_is_direct_data(const struct bkey *k)
{
switch (k->type) {
case KEY_TYPE_btree_ptr:
case KEY_TYPE_extent:
- case KEY_TYPE_reflink_p:
case KEY_TYPE_reflink_v:
return true;
default:
@@ -468,6 +466,12 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
}
}
+static inline bool bkey_extent_is_data(const struct bkey *k)
+{
+ return bkey_extent_is_direct_data(k) ||
+ k->type == KEY_TYPE_reflink_p;
+}
+
/*
* Should extent be counted under inode->i_sectors?
*/
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index d635ebb5..aff70324 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -749,6 +749,9 @@ static void bch2_set_page_dirty(struct bch_fs *c,
struct bch_page_state *s = bch2_page_state(page);
unsigned i, dirty_sectors = 0;
+ WARN_ON(page_offset(page) + offset + len >
+ round_up(i_size_read(&inode->v), block_bytes(c)));
+
for (i = round_down(offset, block_bytes(c)) >> 9;
i < round_up(offset + len, block_bytes(c)) >> 9;
i++) {
@@ -780,6 +783,8 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
struct address_space *mapping = inode->v.i_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_page_reservation res;
+ unsigned len;
+ loff_t isize;
int ret = VM_FAULT_LOCKED;
bch2_page_reservation_init(c, inode, &res);
@@ -797,21 +802,27 @@ vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
pagecache_add_get(&mapping->add_lock);
lock_page(page);
- if (page->mapping != mapping ||
- page_offset(page) > i_size_read(&inode->v)) {
+ isize = i_size_read(&inode->v);
+
+ if (page->mapping != mapping || page_offset(page) >= isize) {
unlock_page(page);
ret = VM_FAULT_NOPAGE;
goto out;
}
- if (bch2_page_reservation_get(c, inode, page, &res,
- 0, PAGE_SIZE, true)) {
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_SHIFT) <= isize)
+ len = PAGE_SIZE;
+ else
+ len = offset_in_page(isize);
+
+ if (bch2_page_reservation_get(c, inode, page, &res, 0, len, true)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
}
- bch2_set_page_dirty(c, inode, page, &res, 0, PAGE_SIZE);
+ bch2_set_page_dirty(c, inode, page, &res, 0, len);
wait_for_stable_page(page);
out:
if (current->pagecache_lock != &mapping->add_lock)
@@ -884,9 +895,8 @@ static void bch2_readpages_end_io(struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bv;
- int i;
- bio_for_each_segment_all(bv, bio, i, iter) {
+ bio_for_each_segment_all(bv, bio, iter) {
struct page *page = bv->bv_page;
if (!bio->bi_status) {
@@ -1287,10 +1297,10 @@ static void bch2_writepage_io_done(struct closure *cl)
struct bio *bio = &io->op.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bvec;
- unsigned i, j;
+ unsigned i;
if (io->op.op.error) {
- bio_for_each_segment_all(bvec, bio, i, iter) {
+ bio_for_each_segment_all(bvec, bio, iter) {
struct bch_page_state *s;
SetPageError(bvec->bv_page);
@@ -1298,8 +1308,8 @@ static void bch2_writepage_io_done(struct closure *cl)
lock_page(bvec->bv_page);
s = bch2_page_state(bvec->bv_page);
- for (j = 0; j < PAGE_SECTORS; j++)
- s->s[j].nr_replicas = 0;
+ for (i = 0; i < PAGE_SECTORS; i++)
+ s->s[i].nr_replicas = 0;
unlock_page(bvec->bv_page);
}
}
@@ -1325,7 +1335,7 @@ static void bch2_writepage_io_done(struct closure *cl)
i_sectors_acct(c, io->op.inode, NULL,
io->op.sectors_added - (s64) io->new_sectors);
- bio_for_each_segment_all(bvec, bio, i, iter) {
+ bio_for_each_segment_all(bvec, bio, iter) {
struct bch_page_state *s = __bch2_page_state(bvec->bv_page);
if (atomic_dec_and_test(&s->write_count))
@@ -1490,6 +1500,10 @@ do_io:
BUG_ON(!bio_add_page(&w->io->op.op.wbio.bio, page,
sectors << 9, offset << 9));
+ /* Check for writing past i_size: */
+ WARN_ON((bio_end_sector(&w->io->op.op.wbio.bio) << 9) >
+ round_up(i_size, block_bytes(c)));
+
w->io->op.op.res.sectors += reserved_sectors;
w->io->op.new_i_size = i_size;
@@ -1994,16 +2008,17 @@ static void bch2_dio_write_loop_async(struct closure *);
static long bch2_dio_write_loop(struct dio_write *dio)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
+ struct bch_fs *c = dio->iop.op.c;
struct kiocb *req = dio->req;
struct address_space *mapping = req->ki_filp->f_mapping;
struct bch_inode_info *inode = dio->iop.inode;
struct bio *bio = &dio->iop.op.wbio.bio;
struct bvec_iter_all iter;
struct bio_vec *bv;
+ unsigned unaligned;
loff_t offset;
bool sync;
long ret;
- int i;
if (dio->loop)
goto loop;
@@ -2036,6 +2051,21 @@ static long bch2_dio_write_loop(struct dio_write *dio)
if (unlikely(ret < 0))
goto err;
+ unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1);
+ bio->bi_iter.bi_size -= unaligned;
+ iov_iter_revert(&dio->iter, unaligned);
+
+ if (!bio->bi_iter.bi_size) {
+ /*
+ * bio_iov_iter_get_pages was only able to get <
+ * blocksize worth of pages:
+ */
+ bio_for_each_segment_all(bv, bio, iter)
+ put_page(bv->bv_page);
+ ret = -EFAULT;
+ goto err;
+ }
+
/* gup might have faulted pages back in: */
ret = write_invalidate_inode_pages_range(mapping,
offset,
@@ -2076,7 +2106,7 @@ err_wait_io:
closure_sync(&dio->cl);
loop:
- bio_for_each_segment_all(bv, bio, i, iter)
+ bio_for_each_segment_all(bv, bio, iter)
put_page(bv->bv_page);
if (!dio->iter.count || dio->iop.op.error)
break;
@@ -2086,8 +2116,8 @@ loop:
ret = dio->iop.op.error ?: ((long) dio->iop.op.written << 9);
err:
__pagecache_block_put(&mapping->add_lock);
- bch2_disk_reservation_put(dio->iop.op.c, &dio->iop.op.res);
- bch2_quota_reservation_put(dio->iop.op.c, inode, &dio->quota_res);
+ bch2_disk_reservation_put(c, &dio->iop.op.res);
+ bch2_quota_reservation_put(c, inode, &dio->quota_res);
if (dio->free_iov)
kfree(dio->iter.iov);
@@ -2530,6 +2560,16 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
if (unlikely(ret))
goto err;
+ /*
+ * When extending, we're going to write the new i_size to disk
+ * immediately so we need to flush anything above the current on disk
+ * i_size first:
+ *
+ * Also, when extending we need to flush the page that i_size currently
+ * straddles - if it's mapped to userspace, we need to ensure that
+ * userspace has to redirty it and call .mkwrite -> set_page_dirty
+ * again to allocate the part of the page that was extended.
+ */
if (iattr->ia_size > inode->ei_inode.bi_size)
ret = filemap_write_and_wait_range(mapping,
inode->ei_inode.bi_size,
@@ -2608,16 +2648,16 @@ err:
return ret;
}
-static long bch2_fcollapse(struct bch_inode_info *inode,
- loff_t offset, loff_t len)
+static long bch2_fcollapse_finsert(struct bch_inode_info *inode,
+ loff_t offset, loff_t len,
+ bool insert)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct btree_trans trans;
- struct btree_iter *src, *dst;
- BKEY_PADDED(k) copy;
- struct bkey_s_c k;
- loff_t new_size;
+ struct btree_iter *src, *dst, *del = NULL;
+ loff_t shift, new_size;
+ u64 src_start;
int ret;
if ((offset | len) & (block_bytes(c) - 1))
@@ -2635,92 +2675,188 @@ static long bch2_fcollapse(struct bch_inode_info *inode,
inode_dio_wait(&inode->v);
pagecache_block_get(&mapping->add_lock);
- ret = -EINVAL;
- if (offset + len >= inode->v.i_size)
- goto err;
+ if (insert) {
+ ret = -EFBIG;
+ if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
+ goto err;
- if (inode->v.i_size < len)
- goto err;
+ ret = -EINVAL;
+ if (offset >= inode->v.i_size)
+ goto err;
+
+ src_start = U64_MAX;
+ shift = len;
+ } else {
+ ret = -EINVAL;
+ if (offset + len >= inode->v.i_size)
+ goto err;
- new_size = inode->v.i_size - len;
+ src_start = offset + len;
+ shift = -len;
+ }
+
+ new_size = inode->v.i_size + shift;
ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret)
goto err;
- dst = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
- POS(inode->v.i_ino, offset >> 9),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- BUG_ON(IS_ERR_OR_NULL(dst));
+ if (insert) {
+ i_size_write(&inode->v, new_size);
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_write_inode_size(c, inode, new_size,
+ ATTR_MTIME|ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+ } else {
+ ret = __bch2_fpunch(c, inode, offset >> 9,
+ (offset + len) >> 9);
+ if (ret)
+ goto err;
+ }
src = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
- POS_MIN, BTREE_ITER_SLOTS);
+ POS(inode->v.i_ino, src_start >> 9),
+ BTREE_ITER_INTENT);
BUG_ON(IS_ERR_OR_NULL(src));
- while (bkey_cmp(dst->pos,
- POS(inode->v.i_ino,
- round_up(new_size, block_bytes(c)) >> 9)) < 0) {
- struct disk_reservation disk_res;
+ dst = bch2_trans_copy_iter(&trans, src);
+ BUG_ON(IS_ERR_OR_NULL(dst));
- ret = bch2_btree_iter_traverse(dst);
- if (ret)
+ while (1) {
+ struct disk_reservation disk_res =
+ bch2_disk_reservation_init(c, 0);
+ BKEY_PADDED(k) copy;
+ struct bkey_i delete;
+ struct bkey_s_c k;
+ struct bpos next_pos;
+ struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
+ struct bpos atomic_end;
+ unsigned commit_flags = BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_USE_RESERVE;
+
+ k = insert
+ ? bch2_btree_iter_peek_prev(src)
+ : bch2_btree_iter_peek(src);
+ if ((ret = bkey_err(k)))
goto bkey_err;
- bch2_btree_iter_set_pos(src,
- POS(dst->pos.inode, dst->pos.offset + (len >> 9)));
+ if (!k.k || k.k->p.inode != inode->v.i_ino)
+ break;
- k = bch2_btree_iter_peek_slot(src);
- if ((ret = bkey_err(k)))
- goto bkey_err;
+ BUG_ON(bkey_cmp(src->pos, bkey_start_pos(k.k)));
+ if (insert &&
+ bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
+ break;
+reassemble:
bkey_reassemble(&copy.k, k);
- bch2_cut_front(src->pos, &copy.k);
- copy.k.k.p.offset -= len >> 9;
+ if (insert &&
+ bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
+ bch2_cut_front(move_pos, &copy.k);
+ bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k.k));
+ }
+
+ copy.k.k.p.offset += shift >> 9;
+ bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k.k));
+
+ ret = bch2_btree_iter_traverse(dst);
+ if (ret)
+ goto bkey_err;
- ret = bch2_extent_trim_atomic(&copy.k, dst);
+ ret = bch2_extent_atomic_end(dst, &copy.k, &atomic_end);
if (ret)
goto bkey_err;
- BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(&copy.k.k)));
+ if (bkey_cmp(atomic_end, copy.k.k.p)) {
+ if (insert) {
+ move_pos = atomic_end;
+ move_pos.offset -= shift >> 9;
+ goto reassemble;
+ } else {
+ bch2_cut_back(atomic_end, &copy.k.k);
+ }
+ }
- ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size,
- bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k)),
- BCH_DISK_RESERVATION_NOFAIL);
- BUG_ON(ret);
+ bkey_init(&delete.k);
+ delete.k.p = src->pos;
+ bch2_key_resize(&delete.k, copy.k.k.size);
- bch2_trans_begin_updates(&trans);
+ next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
- ret = bch2_extent_update(&trans, inode,
- &disk_res, NULL,
- dst, &copy.k,
- 0, true, true, NULL);
+ /*
+ * If the new and old keys overlap (because we're moving an
+ * extent that's bigger than the amount we're collapsing by),
+ * we need to trim the delete key here so they don't overlap
+ * because overlaps on insertions aren't handled before
+ * triggers are run, so the overwrite will get double counted
+ * by the triggers machinery:
+ */
+ if (insert &&
+ bkey_cmp(bkey_start_pos(&copy.k.k), delete.k.p) < 0) {
+ bch2_cut_back(bkey_start_pos(&copy.k.k), &delete.k);
+ } else if (!insert &&
+ bkey_cmp(copy.k.k.p,
+ bkey_start_pos(&delete.k)) > 0) {
+ bch2_cut_front(copy.k.k.p, &delete);
+
+ del = bch2_trans_copy_iter(&trans, src);
+ BUG_ON(IS_ERR_OR_NULL(del));
+
+ bch2_btree_iter_set_pos(del,
+ bkey_start_pos(&delete.k));
+ }
+
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(dst, &copy.k));
+ bch2_trans_update(&trans,
+ BTREE_INSERT_ENTRY(del ?: src, &delete));
+
+ if (copy.k.k.size == k.k->size) {
+ /*
+ * If we're moving the entire extent, we can skip
+ * running triggers:
+ */
+ commit_flags |= BTREE_INSERT_NOMARK;
+ } else {
+ /* We might end up splitting compressed extents: */
+ unsigned nr_ptrs =
+ bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k));
+
+ ret = bch2_disk_reservation_get(c, &disk_res,
+ copy.k.k.size, nr_ptrs,
+ BCH_DISK_RESERVATION_NOFAIL);
+ BUG_ON(ret);
+ }
+
+ ret = bch2_trans_commit(&trans, &disk_res,
+ &inode->ei_journal_seq,
+ commit_flags);
bch2_disk_reservation_put(c, &disk_res);
bkey_err:
+ if (del)
+ bch2_trans_iter_free(&trans, del);
+ del = NULL;
+
+ if (!ret)
+ bch2_btree_iter_set_pos(src, next_pos);
+
if (ret == -EINTR)
ret = 0;
if (ret)
goto err;
- /*
- * XXX: if we error here we've left data with multiple
- * pointers... which isn't a _super_ serious problem...
- */
bch2_trans_cond_resched(&trans);
}
bch2_trans_unlock(&trans);
- ret = __bch2_fpunch(c, inode,
- round_up(new_size, block_bytes(c)) >> 9,
- U64_MAX);
- if (ret)
- goto err;
-
- i_size_write(&inode->v, new_size);
- mutex_lock(&inode->ei_update_lock);
- ret = bch2_write_inode_size(c, inode, new_size,
- ATTR_MTIME|ATTR_CTIME);
- mutex_unlock(&inode->ei_update_lock);
+ if (!insert) {
+ i_size_write(&inode->v, new_size);
+ mutex_lock(&inode->ei_update_lock);
+ ret = bch2_write_inode_size(c, inode, new_size,
+ ATTR_MTIME|ATTR_CTIME);
+ mutex_unlock(&inode->ei_update_lock);
+ }
err:
bch2_trans_exit(&trans);
pagecache_block_put(&mapping->add_lock);
@@ -2889,8 +3025,11 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
return bch2_fpunch(inode, offset, len);
+ if (mode == FALLOC_FL_INSERT_RANGE)
+ return bch2_fcollapse_finsert(inode, offset, len, true);
+
if (mode == FALLOC_FL_COLLAPSE_RANGE)
- return bch2_fcollapse(inode, offset, len);
+ return bch2_fcollapse_finsert(inode, offset, len, false);
return -EOPNOTSUPP;
}
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index e3738757..50a7d8c1 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -509,7 +509,7 @@ retry:
if (fsck_err_on(w.have_inode &&
!(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation &&
- k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c,
+ k.k->p.offset > round_up(w.inode.bi_size, block_bytes(c)) >> 9, c,
"extent type %u offset %llu past end of inode %llu, i_size %llu",
k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) {
bch2_trans_unlock(&trans);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index e2ec5bea..ab8c2560 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -124,9 +124,8 @@ void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio)
{
struct bvec_iter_all iter;
struct bio_vec *bv;
- unsigned i;
- bio_for_each_segment_all(bv, bio, i, iter)
+ bio_for_each_segment_all(bv, bio, iter)
if (bv->bv_page != ZERO_PAGE(0))
mempool_free(bv->bv_page, &c->bio_bounce_pages);
bio->bi_vcnt = 0;
@@ -1210,10 +1209,15 @@ static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio)
return rbio;
}
+/*
+ * Only called on a top level bch_read_bio to complete an entire read request,
+ * not a split:
+ */
static void bch2_rbio_done(struct bch_read_bio *rbio)
{
- bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
- rbio->start_time);
+ if (rbio->start_time)
+ bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
+ rbio->start_time);
bio_endio(&rbio->bio);
}
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 9595ba79..26a2c4fb 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -304,11 +304,10 @@ static void move_free(struct closure *cl)
struct moving_context *ctxt = io->write.ctxt;
struct bvec_iter_all iter;
struct bio_vec *bv;
- int i;
bch2_disk_reservation_put(io->write.op.c, &io->write.op.res);
- bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i, iter)
+ bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter)
if (bv->bv_page)
__free_page(bv->bv_page);
@@ -438,7 +437,8 @@ static int bch2_move_extent(struct bch_fs *c,
GFP_KERNEL))
goto err_free;
- io->rbio.opts = io_opts;
+ io->rbio.c = c;
+ io->rbio.opts = io_opts;
bio_init(&io->rbio.bio, io->bi_inline_vecs, pages);
io->rbio.bio.bi_vcnt = pages;
bio_set_prio(&io->rbio.bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
@@ -548,7 +548,7 @@ peek:
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;
- if (!bkey_extent_is_data(k.k))
+ if (!bkey_extent_is_direct_data(k.k))
goto next_nondata;
if (cur_inum != k.k->p.inode) {
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 4797d620..84b3fb6e 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -42,9 +42,6 @@ void bch2_rebalance_add_key(struct bch_fs *c,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
- if (!bkey_extent_is_data(k.k))
- return;
-
if (!io_opts->background_target &&
!io_opts->background_compression)
return;
@@ -72,30 +69,26 @@ static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
- switch (k.k->type) {
- case KEY_TYPE_extent: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
-
- /* Make sure we have room to add a new pointer: */
- if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX >
- BKEY_EXTENT_VAL_U64s_MAX)
- return DATA_SKIP;
-
- extent_for_each_ptr_decode(e, p, entry)
- if (rebalance_ptr_pred(c, p, io_opts))
- goto found;
-
- return DATA_SKIP;
-found:
- data_opts->target = io_opts->background_target;
- data_opts->btree_insert_flags = 0;
- return DATA_ADD_REPLICAS;
- }
- default:
- return DATA_SKIP;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned nr_replicas = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ nr_replicas += !p.ptr.cached;
+
+ if (rebalance_ptr_pred(c, p, io_opts))
+ goto found;
}
+
+ if (nr_replicas < io_opts->data_replicas)
+ goto found;
+
+ return DATA_SKIP;
+found:
+ data_opts->target = io_opts->background_target;
+ data_opts->btree_insert_flags = 0;
+ return DATA_ADD_REPLICAS;
}
struct rebalance_work {
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index c9558ccb..98d9a143 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -281,8 +281,7 @@ retry:
if (ret)
goto err;
- ret = bch2_extent_atomic_end(&trans, split_iter,
- k, &atomic_end);
+ ret = bch2_extent_atomic_end(split_iter, k, &atomic_end);
if (ret)
goto err;
@@ -936,7 +935,9 @@ out:
ret = 0;
err:
fsck_err:
+ set_bit(BCH_FS_FSCK_DONE, &c->flags);
bch2_flush_fsck_errs(c);
+
journal_keys_free(&journal_keys);
journal_entries_free(&journal_entries);
kfree(clean);
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index d0602725..bb9da2bb 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -16,11 +16,16 @@ static inline int u8_cmp(u8 l, u8 r)
return cmp_int(l, r);
}
-static void verify_replicas_entry_sorted(struct bch_replicas_entry *e)
+static void verify_replicas_entry(struct bch_replicas_entry *e)
{
-#ifdef CONFIG_BCACHES_DEBUG
+#ifdef CONFIG_BCACHEFS_DEBUG
unsigned i;
+ BUG_ON(e->data_type >= BCH_DATA_NR);
+ BUG_ON(!e->nr_devs);
+ BUG_ON(e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs);
+
for (i = 0; i + 1 < e->nr_devs; i++)
BUG_ON(e->devs[i] >= e->devs[i + 1]);
#endif
@@ -158,7 +163,7 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
};
BUG_ON(!new_entry->data_type);
- verify_replicas_entry_sorted(new_entry);
+ verify_replicas_entry(new_entry);
new.entries = kcalloc(new.nr, new.entry_size, GFP_NOIO);
if (!new.entries)
@@ -185,7 +190,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
if (unlikely(entry_size > r->entry_size))
return -1;
- verify_replicas_entry_sorted(search);
+ verify_replicas_entry(search);
#define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size)
idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
@@ -216,7 +221,7 @@ static bool bch2_replicas_marked_locked(struct bch_fs *c,
if (!search->nr_devs)
return true;
- verify_replicas_entry_sorted(search);
+ verify_replicas_entry(search);
return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
@@ -360,6 +365,8 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_cpu new_r, new_gc;
int ret = -ENOMEM;
+ verify_replicas_entry(new_entry);
+
memset(&new_r, 0, sizeof(new_r));
memset(&new_gc, 0, sizeof(new_gc));
@@ -875,9 +882,8 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
goto err;
err = "invalid replicas entry: bad nr_required";
- if (!e->nr_required ||
- (e->nr_required > 1 &&
- e->nr_required >= e->nr_devs))
+ if (e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs)
goto err;
err = "invalid replicas entry: invalid device";
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index 091bf7a8..ef30c73a 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -42,7 +42,6 @@ bch2_hash_info_init(struct bch_fs *c,
u8 digest[SHA256_DIGEST_SIZE];
desc->tfm = c->sha256;
- desc->flags = 0;
crypto_shash_digest(desc, (void *) &bi->bi_hash_seed,
sizeof(bi->bi_hash_seed), digest);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index bd4b3188..4145832f 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -494,6 +494,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_fs_ec_exit(c);
bch2_fs_encryption_exit(c);
bch2_fs_io_exit(c);
+ bch2_fs_btree_iter_exit(c);
bch2_fs_btree_cache_exit(c);
bch2_fs_journal_exit(&c->journal);
bch2_io_clock_exit(&c->io_clock[WRITE]);
@@ -505,7 +506,6 @@ static void bch2_fs_free(struct bch_fs *c)
free_percpu(c->usage[0]);
kfree(c->usage_base);
free_percpu(c->pcpu);
- mempool_exit(&c->btree_iters_pool);
mempool_exit(&c->btree_bounce_pool);
bioset_exit(&c->btree_bio);
mempool_exit(&c->btree_interior_update_pool);
@@ -758,15 +758,12 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->pcpu = alloc_percpu(struct bch_fs_pcpu)) ||
mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
btree_bytes(c)) ||
- mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
- sizeof(struct btree_iter) * BTREE_ITER_MAX +
- sizeof(struct btree_insert_entry) *
- (BTREE_ITER_MAX + 4)) ||
bch2_io_clock_init(&c->io_clock[READ]) ||
bch2_io_clock_init(&c->io_clock[WRITE]) ||
bch2_fs_journal_init(&c->journal) ||
bch2_fs_replicas_init(c) ||
bch2_fs_btree_cache_init(c) ||
+ bch2_fs_btree_iter_init(c) ||
bch2_fs_io_init(c) ||
bch2_fs_encryption_init(c) ||
bch2_fs_compress_init(c) ||