summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-03-29 00:21:38 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2021-03-29 00:22:49 -0400
commit40e14938eeebf74830c870a067cca0e8c73feed7 (patch)
tree3f39fa26d17146bbd3e5cdecc5943019c8b4ad3e /libbcachefs
parenta2094890a90a2f865e49f94e8448deca7e5852ef (diff)
Update bcachefs sources to 9922afc8b6 bcachefs: Add repair code for out of order keys in a btree node.
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/btree_io.c36
-rw-r--r--libbcachefs/btree_key_cache.h9
-rw-r--r--libbcachefs/btree_update_leaf.c2
-rw-r--r--libbcachefs/fs-common.c6
-rw-r--r--libbcachefs/fsck.c1
-rw-r--r--libbcachefs/inode.c78
-rw-r--r--libbcachefs/inode.h2
-rw-r--r--libbcachefs/journal_reclaim.c2
-rw-r--r--libbcachefs/move.c42
9 files changed, 123 insertions, 55 deletions
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index b43d4468..7fbacd9e 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -578,6 +578,10 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
mutex_unlock(&c->sb_lock);
}
+ btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
+ BTREE_ERR_FATAL, c, ca, b, i,
+ "BSET_SEPARATE_WHITEOUTS no longer supported");
+
if (btree_err_on(b->written + sectors > c->opts.btree_node_size,
BTREE_ERR_FIXABLE, c, ca, b, i,
"bset past end of btree node")) {
@@ -660,14 +664,8 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
- bool seen_non_whiteout = false;
int ret = 0;
- if (!BSET_SEPARATE_WHITEOUTS(i)) {
- seen_non_whiteout = true;
- *whiteout_u64s = 0;
- }
-
for (k = i->start;
k != vstruct_last(i);) {
struct bkey_s u;
@@ -719,18 +717,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
BSET_BIG_ENDIAN(i), write,
&b->format, k);
- /*
- * with the separate whiteouts thing (used for extents), the
- * second set of keys actually can have whiteouts too, so we
- * can't solely go off bkey_deleted()...
- */
-
- if (!seen_non_whiteout &&
- (!bkey_deleted(k) ||
- (prev && bkey_iter_cmp(b, prev, k) > 0))) {
- *whiteout_u64s = k->_data - i->_data;
- seen_non_whiteout = true;
- } else if (prev && bkey_iter_cmp(b, prev, k) > 0) {
+ if (prev && bkey_iter_cmp(b, prev, k) > 0) {
char buf1[80];
char buf2[80];
struct bkey up = bkey_unpack_key(b, prev);
@@ -739,10 +726,15 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_to_text(&PBUF(buf2), u.k);
bch2_dump_bset(c, b, i, 0);
- btree_err(BTREE_ERR_FATAL, c, NULL, b, i,
- "keys out of order: %s > %s",
- buf1, buf2);
- /* XXX: repair this */
+
+ if (btree_err(BTREE_ERR_FIXABLE, c, NULL, b, i,
+ "keys out of order: %s > %s",
+ buf1, buf2)) {
+ i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s);
+ memmove_u64s_down(k, bkey_next(k),
+ (u64 *) vstruct_end(i) - (u64 *) k);
+ continue;
+ }
}
prev = k;
diff --git a/libbcachefs/btree_key_cache.h b/libbcachefs/btree_key_cache.h
index 02715cd2..4e1e5a9c 100644
--- a/libbcachefs/btree_key_cache.h
+++ b/libbcachefs/btree_key_cache.h
@@ -1,6 +1,15 @@
#ifndef _BCACHEFS_BTREE_KEY_CACHE_H
#define _BCACHEFS_BTREE_KEY_CACHE_H
+static inline size_t bch2_nr_btree_keys_want_flush(struct bch_fs *c)
+{
+ size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
+ size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys);
+ size_t max_dirty = nr_keys / 4;
+
+ return max_t(ssize_t, 0, nr_dirty - max_dirty);
+}
+
static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c)
{
size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty);
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 67a2c65b..221a6004 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -1188,7 +1188,7 @@ retry:
goto retry;
}
- bch2_trans_iter_put(trans, iter);
+ bch2_trans_iter_free(trans, iter);
return ret;
}
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index 83c2168c..281a6135 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -36,7 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
if (!name)
new_inode->bi_flags |= BCH_INODE_UNLINKED;
- inode_iter = bch2_inode_create(trans, new_inode);
+ inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
ret = PTR_ERR_OR_ZERO(inode_iter);
if (ret)
goto err;
@@ -80,6 +80,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
new_inode->bi_dir_offset = dir_offset;
}
+ /* XXX use bch2_btree_iter_set_snapshot() */
+ inode_iter->snapshot = U32_MAX;
+ bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
+
ret = bch2_inode_write(trans, inode_iter, new_inode);
err:
bch2_trans_iter_put(trans, inode_iter);
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 62788ae1..acf128f0 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -1361,6 +1361,7 @@ static int check_inode(struct btree_trans *trans,
struct bkey_inode_buf p;
bch2_inode_pack(c, &p, &u);
+ p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index f1665ca8..d4c32839 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -471,12 +471,13 @@ static inline u32 bkey_generation(struct bkey_s_c k)
}
struct btree_iter *bch2_inode_create(struct btree_trans *trans,
- struct bch_inode_unpacked *inode_u)
+ struct bch_inode_unpacked *inode_u,
+ u32 snapshot)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = NULL;
struct bkey_s_c k;
- u64 min, max, start, *hint;
+ u64 min, max, start, pos, *hint;
int ret;
u64 cpu = raw_smp_processor_id();
@@ -493,39 +494,70 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans,
if (start >= max || start < min)
start = min;
+
+ pos = start;
+ iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos),
+ BTREE_ITER_ALL_SNAPSHOTS|
+ BTREE_ITER_INTENT);
again:
- for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- if (bkey_cmp(iter->pos, POS(0, max)) > 0)
- break;
+ while ((k = bch2_btree_iter_peek(iter)).k &&
+ !(ret = bkey_err(k)) &&
+ bkey_cmp(k.k->p, POS(0, max)) < 0) {
+ while (pos < iter->pos.offset) {
+ if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
+ goto found_slot;
+
+ pos++;
+ }
+
+ if (k.k->p.snapshot == snapshot &&
+ k.k->type != KEY_TYPE_inode &&
+ !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
+ bch2_btree_iter_next(iter);
+ continue;
+ }
/*
- * There's a potential cache coherency issue with the btree key
- * cache code here - we're iterating over the btree, skipping
- * that cache. We should never see an empty slot that isn't
- * actually empty due to a pending update in the key cache
- * because the update that creates the inode isn't done with a
- * cached iterator, but - better safe than sorry, check the
- * cache before using a slot:
+ * We don't need to iterate over keys in every snapshot once
+ * we've found just one:
*/
- if (k.k->type != KEY_TYPE_inode &&
- !bch2_btree_key_cache_find(c, BTREE_ID_inodes, iter->pos))
+ pos = iter->pos.offset + 1;
+ bch2_btree_iter_set_pos(iter, POS(0, pos));
+ }
+
+ while (!ret && pos < max) {
+ if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
goto found_slot;
+
+ pos++;
}
- bch2_trans_iter_put(trans, iter);
+ if (!ret && start == min)
+ ret = -ENOSPC;
- if (ret)
+ if (ret) {
+ bch2_trans_iter_put(trans, iter);
return ERR_PTR(ret);
-
- if (start != min) {
- /* Retry from start */
- start = min;
- goto again;
}
- return ERR_PTR(-ENOSPC);
+ /* Retry from start */
+ pos = start = min;
+ bch2_btree_iter_set_pos(iter, POS(0, pos));
+ goto again;
found_slot:
+ bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret) {
+ bch2_trans_iter_put(trans, iter);
+ return ERR_PTR(ret);
+ }
+
+ /* We may have raced while the iterator wasn't pointing at pos: */
+ if (k.k->type == KEY_TYPE_inode ||
+ bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
+ goto again;
+
*hint = k.k->p.offset;
inode_u->bi_inum = k.k->p.offset;
inode_u->bi_generation = bkey_generation(k);
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 6bad6dfb..23c322d9 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
struct bch_inode_unpacked *);
struct btree_iter *bch2_inode_create(struct btree_trans *,
- struct bch_inode_unpacked *);
+ struct bch_inode_unpacked *, u32);
int bch2_inode_rm(struct bch_fs *, u64, bool);
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index 4a5b50ed..93b5e07e 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -602,7 +602,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct)
if (fifo_free(&j->pin) <= 32)
min_nr = 1;
- min_nr = max(min_nr, bch2_nr_btree_keys_need_flush(c));
+ min_nr = max(min_nr, bch2_nr_btree_keys_want_flush(c));
trace_journal_reclaim_start(c,
min_nr,
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 732e2dbb..c9e18491 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -509,6 +509,32 @@ err:
return ret;
}
+static int lookup_inode(struct btree_trans *trans, struct bpos pos,
+ struct bch_inode_unpacked *inode)
+{
+ struct btree_iter *iter;
+ struct bkey_s_c k;
+ int ret;
+
+ iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, pos,
+ BTREE_ITER_ALL_SNAPSHOTS);
+ k = bch2_btree_iter_peek(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ ret = k.k->type == KEY_TYPE_inode ? 0 : -EIO;
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode);
+ if (ret)
+ goto err;
+err:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
static int __bch2_move_data(struct bch_fs *c,
struct moving_context *ctxt,
struct bch_ratelimit *rate,
@@ -566,7 +592,7 @@ static int __bch2_move_data(struct bch_fs *c,
try_to_freeze();
}
} while (delay);
-peek:
+
k = bch2_btree_iter_peek(iter);
stats->pos = iter->pos;
@@ -586,14 +612,18 @@ peek:
cur_inum != k.k->p.inode) {
struct bch_inode_unpacked inode;
- /* don't hold btree locks while looking up inode: */
- bch2_trans_unlock(&trans);
-
io_opts = bch2_opts_to_inode_opts(c->opts);
- if (!bch2_inode_find_by_inum(c, k.k->p.inode, &inode))
+
+ ret = lookup_inode(&trans,
+ SPOS(0, k.k->p.inode, k.k->p.snapshot),
+ &inode);
+ if (ret == -EINTR)
+ continue;
+
+ if (!ret)
bch2_io_opts_apply(&io_opts, bch2_inode_opts_get(&inode));
+
cur_inum = k.k->p.inode;
- goto peek;
}
switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) {