summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-08-10 20:28:55 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2022-08-11 17:34:34 -0400
commitddac1641ee1e2686c2211a8d671ea723634dfc89 (patch)
tree0596db2376fb5cbdcd83bf1642efdc7868ade480 /libbcachefs
parenta6128b5335a79cd68b9dbb6b083a835d94539d04 (diff)
Update bcachefs sources to 90a9c61e2b bcachefs: Switch bch2_btree_delete_range() to bch2_trans_run()
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/acl.c4
-rw-r--r--libbcachefs/alloc_background.c300
-rw-r--r--libbcachefs/alloc_background.h10
-rw-r--r--libbcachefs/alloc_foreground.c75
-rw-r--r--libbcachefs/backpointers.c39
-rw-r--r--libbcachefs/bcachefs.h10
-rw-r--r--libbcachefs/btree_cache.c70
-rw-r--r--libbcachefs/btree_gc.c287
-rw-r--r--libbcachefs/btree_io.c8
-rw-r--r--libbcachefs/btree_iter.c508
-rw-r--r--libbcachefs/btree_iter.h145
-rw-r--r--libbcachefs/btree_key_cache.c64
-rw-r--r--libbcachefs/btree_locking.h109
-rw-r--r--libbcachefs/btree_types.h36
-rw-r--r--libbcachefs/btree_update.h27
-rw-r--r--libbcachefs/btree_update_interior.c39
-rw-r--r--libbcachefs/btree_update_leaf.c104
-rw-r--r--libbcachefs/buckets.c35
-rw-r--r--libbcachefs/checksum.c31
-rw-r--r--libbcachefs/data_update.c4
-rw-r--r--libbcachefs/debug.c153
-rw-r--r--libbcachefs/dirent.c4
-rw-r--r--libbcachefs/ec.c138
-rw-r--r--libbcachefs/errcode.c51
-rw-r--r--libbcachefs/errcode.h73
-rw-r--r--libbcachefs/error.c13
-rw-r--r--libbcachefs/error.h27
-rw-r--r--libbcachefs/fs-io.c62
-rw-r--r--libbcachefs/fs-io.h6
-rw-r--r--libbcachefs/fs.c22
-rw-r--r--libbcachefs/fsck.c305
-rw-r--r--libbcachefs/inode.c4
-rw-r--r--libbcachefs/io.c28
-rw-r--r--libbcachefs/journal.c11
-rw-r--r--libbcachefs/journal_io.c18
-rw-r--r--libbcachefs/journal_reclaim.c13
-rw-r--r--libbcachefs/journal_seq_blacklist.c2
-rw-r--r--libbcachefs/lru.c27
-rw-r--r--libbcachefs/migrate.c121
-rw-r--r--libbcachefs/move.c23
-rw-r--r--libbcachefs/movinggc.c11
-rw-r--r--libbcachefs/movinggc.h1
-rw-r--r--libbcachefs/opts.h5
-rw-r--r--libbcachefs/quota.c49
-rw-r--r--libbcachefs/rebalance.c9
-rw-r--r--libbcachefs/recovery.c43
-rw-r--r--libbcachefs/reflink.c5
-rw-r--r--libbcachefs/subvolume.c132
-rw-r--r--libbcachefs/super-io.c11
-rw-r--r--libbcachefs/super.c48
-rw-r--r--libbcachefs/tests.c225
-rw-r--r--libbcachefs/trace.c6
-rw-r--r--libbcachefs/util.c22
-rw-r--r--libbcachefs/xattr.c2
54 files changed, 1903 insertions, 1672 deletions
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 5070caf8..5c6ccf68 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -236,7 +236,7 @@ retry:
&X_SEARCH(acl_to_xattr_type(type), "", 0),
0);
if (ret) {
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (ret != -ENOENT)
acl = ERR_PTR(ret);
@@ -335,7 +335,7 @@ retry:
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err;
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index ca1f45cc..2281b8d4 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -543,7 +543,7 @@ int bch2_alloc_read(struct bch_fs *c)
bch2_trans_exit(&trans);
if (ret)
- bch_err(c, "error reading alloc info: %i", ret);
+ bch_err(c, "error reading alloc info: %s", bch2_err_str(ret));
return ret;
}
@@ -794,7 +794,7 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter;
- struct bkey_s_c k, freespace_k;
+ struct bkey_s_c alloc_k;
struct bch_alloc_v4 a;
u64 genbits;
struct bpos pos;
@@ -804,14 +804,6 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret;
- freespace_k = bch2_btree_iter_peek(iter);
- if (!freespace_k.k)
- return 1;
-
- ret = bkey_err(freespace_k);
- if (ret)
- return ret;
-
pos = iter->pos;
pos.offset &= ~(~0ULL << 56);
genbits = iter->pos.offset & (~0ULL << 56);
@@ -823,18 +815,18 @@ static int bch2_check_discard_freespace_key(struct btree_trans *trans,
bch2_btree_ids[iter->btree_id], pos.inode, pos.offset))
goto delete;
- k = bch2_btree_iter_peek_slot(&alloc_iter);
- ret = bkey_err(k);
+ alloc_k = bch2_btree_iter_peek_slot(&alloc_iter);
+ ret = bkey_err(alloc_k);
if (ret)
goto err;
- bch2_alloc_to_v4(k, &a);
+ bch2_alloc_to_v4(alloc_k, &a);
if (fsck_err_on(a.data_type != state ||
(state == BCH_DATA_free &&
genbits != alloc_freespace_genbits(a)), c,
"%s\n incorrectly set in %s index (free %u, genbits %llu should be %llu)",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
bch2_btree_ids[iter->btree_id],
a.data_type == state,
genbits >> 56, alloc_freespace_genbits(a) >> 56))
@@ -855,6 +847,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter, discard_iter, freespace_iter;
+ struct bkey_s_c k;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@@ -884,36 +877,16 @@ int bch2_check_alloc_info(struct bch_fs *c)
if (ret < 0)
goto err;
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_need_discard, POS_MIN,
- BTREE_ITER_PREFETCH);
- while (1) {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW,
- bch2_check_discard_freespace_key(&trans, &iter));
- if (ret)
- break;
-
- bch2_btree_iter_advance(&iter);
- }
- bch2_trans_iter_exit(&trans, &iter);
-
- if (ret < 0)
- goto err;
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_freespace, POS_MIN,
- BTREE_ITER_PREFETCH);
- while (1) {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW,
- bch2_check_discard_freespace_key(&trans, &iter));
- if (ret)
- break;
-
- bch2_btree_iter_advance(&iter);
- }
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_need_discard, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+ bch2_check_discard_freespace_key(&trans, &iter)) ?:
+ for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_freespace, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+ bch2_check_discard_freespace_key(&trans, &iter));
err:
bch2_trans_exit(&trans);
return ret < 0 ? ret : 0;
@@ -1016,17 +989,44 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
return ret < 0 ? ret : 0;
}
-static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
- struct bch_dev *ca, bool *discard_done)
+static int bch2_discard_one_bucket(struct btree_trans *trans,
+ struct btree_iter *need_discard_iter,
+ struct bpos *discard_pos_done,
+ u64 *seen,
+ u64 *open,
+ u64 *need_journal_commit,
+ u64 *discarded)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter;
+ struct bpos pos = need_discard_iter->pos;
+ struct btree_iter iter = { NULL };
struct bkey_s_c k;
+ struct bch_dev *ca;
struct bkey_i_alloc_v4 *a;
struct printbuf buf = PRINTBUF;
- int ret;
+ bool did_discard = false;
+ int ret = 0;
+
+ ca = bch_dev_bkey_exists(c, pos.inode);
+ if (!percpu_ref_tryget(&ca->io_ref)) {
+ bch2_btree_iter_set_pos(need_discard_iter, POS(pos.inode + 1, 0));
+ return 0;
+ }
+
+ if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
+ (*open)++;
+ goto out;
+ }
- bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, pos,
+ if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk,
+ pos.inode, pos.offset)) {
+ (*need_journal_commit)++;
+ goto out;
+ }
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ need_discard_iter->pos,
BTREE_ITER_CACHED);
k = bch2_btree_iter_peek_slot(&iter);
ret = bkey_err(k);
@@ -1062,7 +1062,8 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
goto out;
}
- if (!*discard_done && ca->mi.discard && !c->opts.nochanges) {
+ if (bkey_cmp(*discard_pos_done, iter.pos) &&
+ ca->mi.discard && !c->opts.nochanges) {
/*
* This works without any other locks because this is the only
* thread that removes items from the need_discard tree
@@ -1071,20 +1072,32 @@ static int bch2_clear_need_discard(struct btree_trans *trans, struct bpos pos,
blkdev_issue_discard(ca->disk_sb.bdev,
k.k->p.offset * ca->mi.bucket_size,
ca->mi.bucket_size,
- GFP_KERNEL, 0);
- *discard_done = true;
+ GFP_KERNEL);
- ret = bch2_trans_relock(trans) ? 0 : -EINTR;
+ ret = bch2_trans_relock(trans);
if (ret)
goto out;
}
+ *discard_pos_done = iter.pos;
+ did_discard = true;
+
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
a->v.data_type = alloc_data_type(a->v, a->v.data_type);
write:
- ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL);
+ if (ret)
+ goto out;
+
+ if (did_discard) {
+ this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
+ (*discarded)++;
+ }
out:
bch2_trans_iter_exit(trans, &iter);
+ percpu_ref_put(&ca->io_ref);
printbuf_exit(&buf);
return ret;
}
@@ -1092,61 +1105,27 @@ out:
static void bch2_do_discards_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, discard_work);
- struct bch_dev *ca = NULL;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
u64 seen = 0, open = 0, need_journal_commit = 0, discarded = 0;
+ struct bpos discard_pos_done = POS_MAX;
int ret;
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_need_discard,
- POS_MIN, 0, k, ret) {
- bool discard_done = false;
-
- if (ca && k.k->p.inode != ca->dev_idx) {
- percpu_ref_put(&ca->io_ref);
- ca = NULL;
- }
-
- if (!ca) {
- ca = bch_dev_bkey_exists(c, k.k->p.inode);
- if (!percpu_ref_tryget(&ca->io_ref)) {
- ca = NULL;
- bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0));
- continue;
- }
- }
-
- seen++;
-
- if (bch2_bucket_is_open_safe(c, k.k->p.inode, k.k->p.offset)) {
- open++;
- continue;
- }
-
- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk,
- k.k->p.inode, k.k->p.offset)) {
- need_journal_commit++;
- continue;
- }
-
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_NOFAIL,
- bch2_clear_need_discard(&trans, k.k->p, ca, &discard_done));
- if (ret)
- break;
-
- this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]);
- discarded++;
- }
- bch2_trans_iter_exit(&trans, &iter);
-
- if (ca)
- percpu_ref_put(&ca->io_ref);
+ /*
+ * We're doing the commit in bch2_discard_one_bucket instead of using
+ * for_each_btree_key_commit() so that we can increment counters after
+ * successful commit:
+ */
+ ret = for_each_btree_key2(&trans, iter,
+ BTREE_ID_need_discard, POS_MIN, 0, k,
+ bch2_discard_one_bucket(&trans, &iter, &discard_pos_done,
+ &seen,
+ &open,
+ &need_journal_commit,
+ &discarded));
bch2_trans_exit(&trans);
@@ -1155,7 +1134,8 @@ static void bch2_do_discards_work(struct work_struct *work)
percpu_ref_put(&c->writes);
- trace_discard_buckets(c, seen, open, need_journal_commit, discarded, ret);
+ trace_discard_buckets(c, seen, open, need_journal_commit, discarded,
+ bch2_err_str(ret));
}
void bch2_do_discards(struct bch_fs *c)
@@ -1165,29 +1145,20 @@ void bch2_do_discards(struct bch_fs *c)
percpu_ref_put(&c->writes);
}
-static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca,
- struct bpos *bucket_pos, unsigned *cached_sectors)
+static int invalidate_one_bucket(struct btree_trans *trans,
+ struct btree_iter *lru_iter, struct bkey_s_c k,
+ unsigned dev_idx, s64 *nr_to_invalidate)
{
struct bch_fs *c = trans->c;
- struct btree_iter lru_iter, alloc_iter = { NULL };
- struct bkey_s_c k;
+ struct btree_iter alloc_iter = { NULL };
struct bkey_i_alloc_v4 *a;
- u64 bucket, idx;
+ struct bpos bucket;
struct printbuf buf = PRINTBUF;
- int ret;
-
- bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
- POS(ca->dev_idx, 0), 0);
-next_lru:
- k = bch2_btree_iter_peek(&lru_iter);
- ret = bkey_err(k);
- if (ret)
- goto out;
+ unsigned cached_sectors;
+ int ret = 0;
- if (!k.k || k.k->p.inode != ca->dev_idx) {
- ret = 1;
- goto out;
- }
+ if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx)
+ return 1;
if (k.k->type != KEY_TYPE_lru) {
prt_printf(&buf, "non lru key in lru btree:\n ");
@@ -1195,26 +1166,22 @@ next_lru:
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
bch_err(c, "%s", buf.buf);
- bch2_btree_iter_advance(&lru_iter);
- goto next_lru;
} else {
bch2_trans_inconsistent(trans, "%s", buf.buf);
ret = -EINVAL;
- goto out;
}
- }
- idx = k.k->p.offset;
- bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
+ goto out;
+ }
- *bucket_pos = POS(ca->dev_idx, bucket);
+ bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx));
- a = bch2_trans_start_alloc_update(trans, &alloc_iter, *bucket_pos);
+ a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
- if (idx != alloc_lru_idx(a->v)) {
+ if (k.k->p.offset != alloc_lru_idx(a->v)) {
prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n ");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
prt_printf(&buf, "\n ");
@@ -1222,19 +1189,18 @@ next_lru:
if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
bch_err(c, "%s", buf.buf);
- bch2_btree_iter_advance(&lru_iter);
- goto next_lru;
} else {
bch2_trans_inconsistent(trans, "%s", buf.buf);
ret = -EINVAL;
- goto out;
}
+
+ goto out;
}
if (!a->v.cached_sectors)
bch_err(c, "invalidating empty bucket, confused");
- *cached_sectors = a->v.cached_sectors;
+ cached_sectors = a->v.cached_sectors;
SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
a->v.gen++;
@@ -1244,13 +1210,18 @@ next_lru:
a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
- ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
- BTREE_TRIGGER_BUCKET_INVALIDATE);
+ ret = bch2_trans_update(trans, &alloc_iter, &a->k_i,
+ BTREE_TRIGGER_BUCKET_INVALIDATE) ?:
+ bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_USE_RESERVE|BTREE_INSERT_NOFAIL);
if (ret)
goto out;
+
+ trace_invalidate_bucket(c, bucket.inode, bucket.offset, cached_sectors);
+ this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]);
+ --*nr_to_invalidate;
out:
bch2_trans_iter_exit(trans, &alloc_iter);
- bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
@@ -1260,8 +1231,9 @@ static void bch2_do_invalidates_work(struct work_struct *work)
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
struct bch_dev *ca;
struct btree_trans trans;
- struct bpos bucket;
- unsigned i, sectors;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ unsigned i;
int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
@@ -1270,17 +1242,13 @@ static void bch2_do_invalidates_work(struct work_struct *work)
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
- while (nr_to_invalidate-- >= 0) {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_NOFAIL,
- invalidate_one_bucket(&trans, ca, &bucket,
- &sectors));
- if (ret)
- break;
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru,
+ POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k,
+ invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate));
- trace_invalidate_bucket(c, bucket.inode, bucket.offset, sectors);
- this_cpu_inc(c->counters[BCH_COUNTER_bucket_invalidate]);
+ if (ret < 0) {
+ percpu_ref_put(&ca->ref);
+ break;
}
}
@@ -1295,16 +1263,13 @@ void bch2_do_invalidates(struct bch_fs *c)
percpu_ref_put(&c->writes);
}
-static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter)
+static int bucket_freespace_init(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k, struct bch_dev *ca)
{
struct bch_alloc_v4 a;
- struct bkey_s_c k;
- int ret;
- k = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(k);
- if (ret)
- return ret;
+ if (iter->pos.offset >= ca->mi.nbuckets)
+ return 1;
bch2_alloc_to_v4(k, &a);
return bch2_bucket_do_index(trans, k, &a, true);
@@ -1320,25 +1285,16 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_alloc,
- POS(ca->dev_idx, ca->mi.first_bucket),
- BTREE_ITER_SLOTS|
- BTREE_ITER_PREFETCH, k, ret) {
- if (iter.pos.offset >= ca->mi.nbuckets)
- break;
-
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW,
- bucket_freespace_init(&trans, &iter));
- if (ret)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, ca->mi.first_bucket),
+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW,
+ bucket_freespace_init(&trans, &iter, k, ca));
bch2_trans_exit(&trans);
- if (ret) {
- bch_err(ca, "error initializing free space: %i", ret);
+ if (ret < 0) {
+ bch_err(ca, "error initializing free space: %s", bch2_err_str(ret));
return ret;
}
@@ -1347,7 +1303,7 @@ static int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca)
SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
mutex_unlock(&c->sb_lock);
- return ret;
+ return 0;
}
int bch2_fs_freespace_init(struct bch_fs *c)
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 2ac6b504..044bc729 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -150,11 +150,13 @@ void bch2_do_discards(struct bch_fs *);
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
struct bch_dev_usage u)
{
- u64 free = u.d[BCH_DATA_free].buckets +
- u.d[BCH_DATA_need_discard].buckets;
+ u64 want_free = ca->mi.nbuckets >> 7;
+ u64 free = max_t(s64, 0,
+ u.d[BCH_DATA_free].buckets
+ + u.d[BCH_DATA_need_discard].buckets
+ - bch2_dev_buckets_reserved(ca, RESERVE_none));
- return clamp_t(s64, (ca->mi.nbuckets >> 7) - free,
- 0, u.d[BCH_DATA_cached].buckets);
+ return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
}
void bch2_do_invalidates(struct bch_fs *);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 7a878a69..0a9f1313 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -26,6 +26,7 @@
#include "error.h"
#include "io.h"
#include "journal.h"
+#include "movinggc.h"
#include <linux/math64.h>
#include <linux/rculist.h>
@@ -226,7 +227,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
c->blocked_allocate_open_bucket = local_clock();
spin_unlock(&c->freelist_lock);
- return ERR_PTR(-OPEN_BUCKETS_EMPTY);
+ return ERR_PTR(-BCH_ERR_open_buckets_empty);
}
/* Recheck under lock: */
@@ -339,6 +340,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
skipped_nouse,
cl);
err:
+ set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter);
printbuf_exit(&buf);
return ob;
@@ -395,7 +397,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
*cur_bucket = max_t(u64, *cur_bucket, ca->mi.first_bucket);
*cur_bucket = max_t(u64, *cur_bucket, ca->new_fs_bucket_idx);
- for_each_btree_key(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket),
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, *cur_bucket),
BTREE_ITER_SLOTS, k, ret) {
struct bch_alloc_v4 a;
@@ -425,7 +427,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
*cur_bucket = iter.pos.offset;
- return ob ?: ERR_PTR(ret ?: -FREELIST_EMPTY);
+ return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
}
static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
@@ -454,6 +456,11 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
BUG_ON(ca->new_fs_bucket_idx);
+ /*
+ * XXX:
+ * On transaction restart, we'd like to restart from the bucket we were
+ * at previously
+ */
for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
POS(ca->dev_idx, *cur_bucket), 0, k, ret) {
if (k.k->p.inode != ca->dev_idx)
@@ -462,10 +469,9 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
*cur_bucket < k.k->p.offset && !ob;
(*cur_bucket)++) {
- if (btree_trans_too_many_iters(trans)) {
- ob = ERR_PTR(-EINTR);
+ ret = btree_trans_too_many_iters(trans);
+ if (ret)
break;
- }
(*buckets_seen)++;
@@ -476,7 +482,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
skipped_nouse,
k, cl);
}
- if (ob)
+
+ if (ob || ret)
break;
}
bch2_trans_iter_exit(trans, &iter);
@@ -496,8 +503,10 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
{
struct open_bucket *ob = NULL;
struct bch_dev_usage usage;
+ bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
+ u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
u64 avail;
- u64 cur_bucket = 0;
+ u64 cur_bucket = start;
u64 buckets_seen = 0;
u64 skipped_open = 0;
u64 skipped_need_journal_commit = 0;
@@ -506,7 +515,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
int ret;
again:
usage = bch2_dev_usage_read(ca);
- avail = dev_buckets_free(ca, usage,reserve);
+ avail = dev_buckets_free(ca, usage, reserve);
if (usage.d[BCH_DATA_need_discard].buckets > avail)
bch2_do_discards(c);
@@ -527,7 +536,7 @@ again:
if (!c->blocked_allocate)
c->blocked_allocate = local_clock();
- ob = ERR_PTR(-FREELIST_EMPTY);
+ ob = ERR_PTR(-BCH_ERR_freelist_empty);
goto err;
}
@@ -551,17 +560,30 @@ again:
if (skipped_need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
+
+ if (!ob && !ret && !freespace_initialized && start) {
+ start = cur_bucket = 0;
+ goto again;
+ }
+
+ if (!freespace_initialized)
+ ca->bucket_alloc_trans_early_cursor = cur_bucket;
err:
if (!ob)
- ob = ERR_PTR(ret ?: -FREELIST_EMPTY);
+ ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
if (IS_ERR(ob)) {
- trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve], avail,
+ trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
+ usage.d[BCH_DATA_free].buckets,
+ avail,
+ bch2_copygc_wait_amount(c),
+ c->copygc_wait - atomic64_read(&c->io_clock[WRITE].now),
buckets_seen,
skipped_open,
skipped_need_journal_commit,
skipped_nouse,
- cl == NULL, PTR_ERR(ob));
+ cl == NULL,
+ bch2_err_str(PTR_ERR(ob)));
atomic_long_inc(&c->bucket_alloc_fail);
}
@@ -648,7 +670,7 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
unsigned dev;
struct bch_dev *ca;
- int ret = -INSUFFICIENT_DEVICES;
+ int ret = -BCH_ERR_insufficient_devices;
unsigned i;
BUG_ON(*nr_effective >= nr_replicas);
@@ -846,8 +868,8 @@ static int open_bucket_add_buckets(struct bch_fs *c,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, _cl);
- if (ret == -FREELIST_EMPTY ||
- ret == -OPEN_BUCKETS_EMPTY)
+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+ bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
if (*nr_effective >= nr_replicas)
return 0;
@@ -868,7 +890,9 @@ retry_blocking:
ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
- if (ret && ret != -INSUFFICIENT_DEVICES && !cl && _cl) {
+ if (ret &&
+ !bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
+ !cl && _cl) {
cl = _cl;
goto retry_blocking;
}
@@ -1111,7 +1135,7 @@ alloc_done:
if (erasure_code && !ec_open_bucket(c, &ptrs))
pr_debug("failed to get ec bucket: ret %u", ret);
- if (ret == -INSUFFICIENT_DEVICES &&
+ if (ret == -BCH_ERR_insufficient_devices &&
nr_effective >= nr_replicas_required)
ret = 0;
@@ -1142,19 +1166,18 @@ err:
mutex_unlock(&wp->lock);
- if (ret == -FREELIST_EMPTY &&
+ if (bch2_err_matches(ret, BCH_ERR_freelist_empty) &&
try_decrease_writepoints(c, write_points_nr))
goto retry;
- switch (ret) {
- case -OPEN_BUCKETS_EMPTY:
- case -FREELIST_EMPTY:
+ if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty))
return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
- case -INSUFFICIENT_DEVICES:
+
+ if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
return ERR_PTR(-EROFS);
- default:
- return ERR_PTR(ret);
- }
+
+ return ERR_PTR(ret);
}
struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 08d67958..5a46b25b 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -443,8 +443,8 @@ int bch2_get_next_backpointer(struct btree_trans *trans,
goto out;
}
- for_each_btree_key(trans, bp_iter, BTREE_ID_backpointers,
- bp_pos, 0, k, ret) {
+ for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers,
+ bp_pos, 0, k, ret) {
if (bpos_cmp(k.k->p, bp_end_pos) >= 0)
break;
@@ -569,22 +569,16 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
return NULL;
}
-static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter)
+static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
+ struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = { NULL };
struct bch_dev *ca;
- struct bkey_s_c k, alloc_k;
+ struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret = 0;
- k = bch2_btree_iter_peek(bp_iter);
- ret = bkey_err(k);
- if (ret)
- return ret;
- if (!k.k)
- return 0;
-
if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c,
"backpointer for mising device:\n%s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
@@ -619,25 +613,14 @@ fsck_err:
/* verify that every backpointer has a corresponding alloc key */
int bch2_check_btree_backpointers(struct bch_fs *c)
{
- struct btree_trans trans;
struct btree_iter iter;
- int ret = 0;
-
- bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_backpointers, POS_MIN, 0);
-
- do {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL,
- bch2_check_btree_backpointer(&trans, &iter));
- if (ret)
- break;
- } while (bch2_btree_iter_advance(&iter));
+ struct bkey_s_c k;
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
- return ret;
+ return bch2_trans_run(c,
+ for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_backpointers, POS_MIN, 0, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ bch2_check_btree_backpointer(&trans, &iter, k)));
}
static int check_bp_exists(struct btree_trans *trans,
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 31e387b1..8ffdb4de 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -319,6 +319,8 @@ BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
#endif
+#define BCH_LOCK_TIME_NR 128
+
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
@@ -463,6 +465,7 @@ struct bch_dev {
/* Allocator: */
u64 new_fs_bucket_idx;
+ u64 bucket_alloc_trans_early_cursor;
unsigned nr_open_buckets;
unsigned nr_btree_reserve;
@@ -528,6 +531,11 @@ struct btree_debug {
unsigned id;
};
+struct lock_held_stats {
+ struct time_stats times[BCH_LOCK_TIME_NR];
+ const char *names[BCH_LOCK_TIME_NR];
+};
+
struct bch_fs_pcpu {
u64 sectors_available;
};
@@ -921,6 +929,8 @@ struct bch_fs {
bool promote_whole_extents;
struct time_stats times[BCH_TIME_STAT_NR];
+
+ struct lock_held_stats lock_held_stats;
};
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 4d032ae3..5a6c93d1 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -7,6 +7,7 @@
#include "btree_iter.h"
#include "btree_locking.h"
#include "debug.h"
+#include "errcode.h"
#include "error.h"
#include <linux/prefetch.h>
@@ -700,20 +701,16 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* been freed:
*/
if (trans && !bch2_btree_node_relock(trans, path, level + 1)) {
- trace_trans_restart_relock_parent_for_fill(trans->fn,
- _THIS_IP_, btree_id, &path->pos);
- btree_trans_restart(trans);
- return ERR_PTR(-EINTR);
+ trace_trans_restart_relock_parent_for_fill(trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock));
}
b = bch2_btree_node_mem_alloc(c, level != 0);
if (trans && b == ERR_PTR(-ENOMEM)) {
trans->memory_allocation_failure = true;
- trace_trans_restart_memory_allocation_failure(trans->fn,
- _THIS_IP_, btree_id, &path->pos);
- btree_trans_restart(trans);
- return ERR_PTR(-EINTR);
+ trace_trans_restart_memory_allocation_failure(trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail));
}
if (IS_ERR(b))
@@ -750,18 +747,19 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
if (!sync)
return NULL;
- if (trans &&
- (!bch2_trans_relock(trans) ||
- !bch2_btree_path_relock_intent(trans, path))) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(-EINTR);
+ if (trans) {
+ int ret = bch2_trans_relock(trans) ?:
+ bch2_btree_path_relock_intent(trans, path);
+ if (ret) {
+ BUG_ON(!trans->restarted);
+ return ERR_PTR(ret);
+ }
}
if (!six_relock_type(&b->c.lock, lock_type, seq)) {
- trace_trans_restart_relock_after_fill(trans->fn, _THIS_IP_,
- btree_id, &path->pos);
- btree_trans_restart(trans);
- return ERR_PTR(-EINTR);
+ if (trans)
+ trace_trans_restart_relock_after_fill(trans, _THIS_IP_, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
}
return b;
@@ -772,7 +770,9 @@ static int lock_node_check_fn(struct six_lock *lock, void *p)
struct btree *b = container_of(lock, struct btree, c.lock);
const struct bkey_i *k = p;
- return b->hash_val == btree_ptr_hash_val(k) ? 0 : -1;
+ if (b->hash_val != btree_ptr_hash_val(k))
+ return BCH_ERR_lock_fail_node_reused;
+ return 0;
}
static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
@@ -831,6 +831,7 @@ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
+ int ret;
EBUG_ON(level >= BTREE_MAX_DEPTH);
@@ -893,13 +894,16 @@ lock_node:
* was removed - and we'll bail out:
*/
if (btree_node_read_locked(path, level + 1))
- btree_node_unlock(path, level + 1);
+ btree_node_unlock(trans, path, level + 1);
- if (!btree_node_lock(trans, path, b, k->k.p, level, lock_type,
- lock_node_check_fn, (void *) k, trace_ip)) {
- if (!trans->restarted)
+ ret = btree_node_lock(trans, path, b, k->k.p, level, lock_type,
+ lock_node_check_fn, (void *) k, trace_ip);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
- return ERR_PTR(-EINTR);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ERR_PTR(ret);
+ BUG();
}
if (unlikely(b->hash_val != btree_ptr_hash_val(k) ||
@@ -909,12 +913,8 @@ lock_node:
if (bch2_btree_node_relock(trans, path, level + 1))
goto retry;
- trace_trans_restart_btree_node_reused(trans->fn,
- trace_ip,
- path->btree_id,
- &path->pos);
- btree_trans_restart(trans);
- return ERR_PTR(-EINTR);
+ trace_trans_restart_btree_node_reused(trans, trace_ip, path);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused));
}
}
@@ -930,11 +930,13 @@ lock_node:
* should_be_locked is not set on this path yet, so we need to
* relock it specifically:
*/
- if (trans &&
- (!bch2_trans_relock(trans) ||
- !bch2_btree_path_relock_intent(trans, path))) {
- BUG_ON(!trans->restarted);
- return ERR_PTR(-EINTR);
+ if (trans) {
+ int ret = bch2_trans_relock(trans) ?:
+ bch2_btree_path_relock_intent(trans, path);
+ if (ret) {
+ BUG_ON(!trans->restarted);
+ return ERR_PTR(ret);
+ }
}
if (!six_relock_type(&b->c.lock, lock_type, seq))
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 214529b6..2f563365 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -98,7 +98,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
buf1.buf, buf2.buf) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+ ret = -BCH_ERR_need_topology_repair;
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
@@ -126,7 +126,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
buf1.buf, buf2.buf) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
bch_info(c, "Halting mark and sweep to start topology repair pass");
- ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+ ret = -BCH_ERR_need_topology_repair;
goto err;
} else {
set_bit(BCH_FS_INITIAL_GC_UNFIXED, &c->flags);
@@ -402,8 +402,8 @@ again:
}
if (ret) {
- bch_err(c, "%s: error %i getting btree node",
- __func__, ret);
+ bch_err(c, "%s: error getting btree node: %s",
+ __func__, bch2_err_str(ret));
break;
}
@@ -471,8 +471,8 @@ again:
ret = PTR_ERR_OR_ZERO(cur);
if (ret) {
- bch_err(c, "%s: error %i getting btree node",
- __func__, ret);
+ bch_err(c, "%s: error getting btree node: %s",
+ __func__, bch2_err_str(ret));
goto err;
}
@@ -537,7 +537,7 @@ static int bch2_repair_topology(struct bch_fs *c)
if (ret == DROP_THIS_NODE) {
bch_err(c, "empty btree root - repair unimplemented");
- ret = FSCK_ERR_EXIT;
+ ret = -BCH_ERR_fsck_repair_unimplemented;
}
}
@@ -804,7 +804,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
fsck_err:
err:
if (ret)
- bch_err(c, "%s: ret %i", __func__, ret);
+ bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
return ret;
}
@@ -910,7 +910,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level,
false, &k, true);
if (ret) {
- bch_err(c, "%s: error %i from bch2_gc_mark_key", __func__, ret);
+ bch_err(c, "%s: error from bch2_gc_mark_key: %s",
+ __func__, bch2_err_str(ret));
goto fsck_err;
}
@@ -959,7 +960,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(cur.k)), buf.buf)) &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags)) {
- ret = FSCK_ERR_START_TOPOLOGY_REPAIR;
+ ret = -BCH_ERR_need_topology_repair;
bch_info(c, "Halting mark and sweep to start topology repair pass");
goto fsck_err;
} else {
@@ -970,8 +971,8 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
continue;
}
} else if (ret) {
- bch_err(c, "%s: error %i getting btree node",
- __func__, ret);
+ bch_err(c, "%s: error getting btree node: %s",
+ __func__, bch2_err_str(ret));
break;
}
@@ -1012,7 +1013,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
if (mustfix_fsck_err_on(bpos_cmp(b->data->min_key, POS_MIN), c,
"btree root with incorrect min_key: %s", buf.buf)) {
bch_err(c, "repair unimplemented");
- ret = FSCK_ERR_EXIT;
+ ret = -BCH_ERR_fsck_repair_unimplemented;
goto fsck_err;
}
@@ -1021,7 +1022,7 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
if (mustfix_fsck_err_on(bpos_cmp(b->data->max_key, SPOS_MAX), c,
"btree root with incorrect max_key: %s", buf.buf)) {
bch_err(c, "repair unimplemented");
- ret = FSCK_ERR_EXIT;
+ ret = -BCH_ERR_fsck_repair_unimplemented;
goto fsck_err;
}
@@ -1038,7 +1039,7 @@ fsck_err:
six_unlock_read(&b->c.lock);
if (ret < 0)
- bch_err(c, "%s: ret %i", __func__, ret);
+ bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
printbuf_exit(&buf);
return ret;
}
@@ -1071,7 +1072,7 @@ static int bch2_gc_btrees(struct bch_fs *c, bool initial, bool metadata_only)
: bch2_gc_btree(&trans, ids[i], initial, metadata_only);
if (ret < 0)
- bch_err(c, "%s: ret %i", __func__, ret);
+ bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
bch2_trans_exit(&trans);
return ret;
@@ -1269,7 +1270,7 @@ fsck_err:
if (ca)
percpu_ref_put(&ca->ref);
if (ret)
- bch_err(c, "%s: ret %i", __func__, ret);
+ bch_err(c, "error from %s(): %s", __func__, bch2_err_str(ret));
percpu_up_write(&c->mark_lock);
printbuf_exit(&buf);
@@ -1324,21 +1325,19 @@ static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
+ struct bkey_s_c k,
bool metadata_only)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, iter->pos.inode);
struct bucket gc, *b;
- struct bkey_s_c k;
struct bkey_i_alloc_v4 *a;
struct bch_alloc_v4 old, new;
enum bch_data_type type;
int ret;
- k = bch2_btree_iter_peek_slot(iter);
- ret = bkey_err(k);
- if (ret)
- return ret;
+ if (bkey_cmp(iter->pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
+ return 1;
bch2_alloc_to_v4(k, &old);
new = old;
@@ -1431,31 +1430,21 @@ static int bch2_gc_alloc_done(struct bch_fs *c, bool metadata_only)
bch2_trans_init(&trans, c, 0, 0);
for_each_member_device(ca, c, i) {
- for_each_btree_key(&trans, iter, BTREE_ID_alloc,
- POS(ca->dev_idx, ca->mi.first_bucket),
- BTREE_ITER_SLOTS|
- BTREE_ITER_PREFETCH, k, ret) {
- if (bkey_cmp(iter.pos, POS(ca->dev_idx, ca->mi.nbuckets)) >= 0)
- break;
-
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW,
- bch2_alloc_write_key(&trans, &iter,
- metadata_only));
- if (ret)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
-
- if (ret) {
- bch_err(c, "error writing alloc info: %i", ret);
+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_alloc,
+ POS(ca->dev_idx, ca->mi.first_bucket),
+ BTREE_ITER_SLOTS|BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW,
+ bch2_alloc_write_key(&trans, &iter, k, metadata_only));
+
+ if (ret < 0) {
+ bch_err(c, "error writing alloc info: %s", bch2_err_str(ret));
percpu_ref_put(&ca->ref);
break;
}
}
bch2_trans_exit(&trans);
- return ret;
+ return ret < 0 ? ret : 0;
}
static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
@@ -1512,7 +1501,7 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only)
bch2_trans_exit(&trans);
if (ret)
- bch_err(c, "error reading alloc info at gc start: %i", ret);
+ bch_err(c, "error reading alloc info at gc start: %s", bch2_err_str(ret));
return ret;
}
@@ -1539,72 +1528,79 @@ static void bch2_gc_alloc_reset(struct bch_fs *c, bool metadata_only)
};
}
-static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
+static int bch2_gc_write_reflink_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ size_t *idx)
{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct reflink_gc *r;
- size_t idx = 0;
+ struct bch_fs *c = trans->c;
+ const __le64 *refcount = bkey_refcount_c(k);
struct printbuf buf = PRINTBUF;
+ struct reflink_gc *r;
int ret = 0;
- if (metadata_only)
+ if (!refcount)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ while ((r = genradix_ptr(&c->reflink_gc_table, *idx)) &&
+ r->offset < k.k->p.offset)
+ ++*idx;
- for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- const __le64 *refcount = bkey_refcount_c(k);
+ if (!r ||
+ r->offset != k.k->p.offset ||
+ r->size != k.k->size) {
+ bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
+ return -EINVAL;
+ }
- if (!refcount)
- continue;
+ if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
+ "reflink key has wrong refcount:\n"
+ " %s\n"
+ " should be %u",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ r->refcount)) {
+ struct bkey_i *new;
- r = genradix_ptr(&c->reflink_gc_table, idx++);
- if (!r ||
- r->offset != k.k->p.offset ||
- r->size != k.k->size) {
- bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
- ret = -EINVAL;
- break;
- }
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
- if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
- "reflink key has wrong refcount:\n"
- " %s\n"
- " should be %u",
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf),
- r->refcount)) {
- struct bkey_i *new;
+ bkey_reassemble(new, k);
- new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- break;
- }
+ if (!r->refcount)
+ new->k.type = KEY_TYPE_deleted;
+ else
+ *bkey_refcount(new) = cpu_to_le64(r->refcount);
- bkey_reassemble(new, k);
+ ret = bch2_trans_update(trans, iter, new, 0);
+ }
+fsck_err:
+ printbuf_exit(&buf);
+ return ret;
+}
- if (!r->refcount)
- new->k.type = KEY_TYPE_deleted;
- else
- *bkey_refcount(new) = cpu_to_le64(r->refcount);
+static int bch2_gc_reflink_done(struct bch_fs *c, bool metadata_only)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ size_t idx = 0;
+ int ret = 0;
- ret = commit_do(&trans, NULL, NULL, 0,
- __bch2_btree_insert(&trans, BTREE_ID_reflink, new));
- kfree(new);
+ if (metadata_only)
+ return 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_reflink, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL,
+ bch2_gc_write_reflink_key(&trans, &iter, k, &idx));
- if (ret)
- break;
- }
- }
-fsck_err:
- bch2_trans_iter_exit(&trans, &iter);
c->reflink_gc_nr = 0;
bch2_trans_exit(&trans);
- printbuf_exit(&buf);
return ret;
}
@@ -1656,66 +1652,73 @@ static void bch2_gc_reflink_reset(struct bch_fs *c, bool metadata_only)
r->refcount = 0;
}
-static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
+static int bch2_gc_write_stripes_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k)
{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct gc_stripe *m;
- const struct bch_stripe *s;
+ struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
+ const struct bch_stripe *s;
+ struct gc_stripe *m;
unsigned i;
int ret = 0;
- if (metadata_only)
+ if (k.k->type != KEY_TYPE_stripe)
return 0;
- bch2_trans_init(&trans, c, 0, 0);
+ s = bkey_s_c_to_stripe(k).v;
+ m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
- for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- if (k.k->type != KEY_TYPE_stripe)
- continue;
-
- s = bkey_s_c_to_stripe(k).v;
- m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
-
- for (i = 0; i < s->nr_blocks; i++)
- if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
- goto inconsistent;
- continue;
+ for (i = 0; i < s->nr_blocks; i++)
+ if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
+ goto inconsistent;
+ return 0;
inconsistent:
- if (fsck_err_on(true, c,
- "stripe has wrong block sector count %u:\n"
- " %s\n"
- " should be %u", i,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf),
- m ? m->block_sectors[i] : 0)) {
- struct bkey_i_stripe *new;
-
- new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- break;
- }
+ if (fsck_err_on(true, c,
+ "stripe has wrong block sector count %u:\n"
+ " %s\n"
+ " should be %u", i,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf),
+ m ? m->block_sectors[i] : 0)) {
+ struct bkey_i_stripe *new;
+
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(new);
+ if (ret)
+ return ret;
- bkey_reassemble(&new->k_i, k);
+ bkey_reassemble(&new->k_i, k);
- for (i = 0; i < new->v.nr_blocks; i++)
- stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
+ for (i = 0; i < new->v.nr_blocks; i++)
+ stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
- ret = commit_do(&trans, NULL, NULL, 0,
- __bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
- kfree(new);
- }
+ ret = bch2_trans_update(trans, iter, &new->k_i, 0);
}
fsck_err:
- bch2_trans_iter_exit(&trans, &iter);
+ printbuf_exit(&buf);
+ return ret;
+}
- bch2_trans_exit(&trans);
+static int bch2_gc_stripes_done(struct bch_fs *c, bool metadata_only)
+{
+ struct btree_trans trans;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
- printbuf_exit(&buf);
+ if (metadata_only)
+ return 0;
+
+ bch2_trans_init(&trans, c, 0, 0);
+
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_stripes, POS_MIN,
+ BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL,
+ bch2_gc_write_stripes_key(&trans, &iter, k));
+
+ bch2_trans_exit(&trans);
return ret;
}
@@ -1777,7 +1780,7 @@ again:
ret = bch2_gc_btrees(c, initial, metadata_only);
- if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR &&
+ if (ret == -BCH_ERR_need_topology_repair &&
!test_bit(BCH_FS_TOPOLOGY_REPAIR_DONE, &c->flags) &&
!test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags)) {
set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
@@ -1785,8 +1788,8 @@ again:
ret = 0;
}
- if (ret == FSCK_ERR_START_TOPOLOGY_REPAIR)
- ret = FSCK_ERR_EXIT;
+ if (ret == -BCH_ERR_need_topology_repair)
+ ret = -BCH_ERR_fsck_errors_not_fixed;
if (ret)
goto out;
@@ -1969,7 +1972,7 @@ int bch2_gc_gens(struct bch_fs *c)
BTREE_INSERT_NOFAIL,
gc_btree_gens_key(&trans, &iter, k));
if (ret) {
- bch_err(c, "error recalculating oldest_gen: %i", ret);
+ bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
goto err;
}
}
@@ -1982,7 +1985,7 @@ int bch2_gc_gens(struct bch_fs *c)
BTREE_INSERT_NOFAIL,
bch2_alloc_write_oldest_gen(&trans, &iter, k));
if (ret) {
- bch_err(c, "error writing oldest_gen: %i", ret);
+ bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
goto err;
}
@@ -2054,7 +2057,7 @@ static int bch2_gc_thread(void *arg)
ret = bch2_gc_gens(c);
#endif
if (ret < 0)
- bch_err(c, "btree gc failed: %i", ret);
+ bch_err(c, "btree gc failed: %s", bch2_err_str(ret));
debug_check_no_locks_held();
}
@@ -2084,7 +2087,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p)) {
- bch_err(c, "error creating gc thread: %li", PTR_ERR(p));
+ bch_err(c, "error creating gc thread: %s", bch2_err_str(PTR_ERR(p)));
return PTR_ERR(p);
}
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 9bf3f77b..ae731b3a 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -543,7 +543,7 @@ enum btree_validate_ret {
struct printbuf out = PRINTBUF; \
\
btree_err_msg(&out, c, ca, b, i, b->written, write); \
- prt_printf(&out, ": " msg, ##__VA_ARGS__); \
+ prt_printf(&out, ": " msg, ##__VA_ARGS__); \
\
if (type == BTREE_ERR_FIXABLE && \
write == READ && \
@@ -558,7 +558,7 @@ enum btree_validate_ret {
\
switch (type) { \
case BTREE_ERR_FIXABLE: \
- ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
case BTREE_ERR_WANT_RETRY: \
if (have_retry) { \
@@ -570,7 +570,7 @@ enum btree_validate_ret {
ret = BTREE_RETRY_READ; \
goto fsck_err; \
case BTREE_ERR_FATAL: \
- ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
} \
break; \
@@ -578,7 +578,7 @@ enum btree_validate_ret {
bch_err(c, "corrupt metadata before write: %s", out.buf);\
\
if (bch2_fs_inconsistent(c)) { \
- ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
} \
break; \
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 923381d8..946c462e 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -16,6 +16,7 @@
#include "replicas.h"
#include "subvolume.h"
+#include <linux/prandom.h>
#include <linux/prefetch.h>
#include <trace/events/bcachefs.h>
@@ -46,7 +47,7 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
if (need_resched() || race_fault()) {
bch2_trans_unlock(trans);
schedule();
- return bch2_trans_relock(trans) ? 0 : -EINTR;
+ return bch2_trans_relock(trans);
} else {
return 0;
}
@@ -99,12 +100,6 @@ static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos
return p;
}
-static inline bool is_btree_node(struct btree_path *path, unsigned l)
-{
- return l < BTREE_MAX_DEPTH &&
- (unsigned long) path->l[l].b >= 128;
-}
-
static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
{
struct bpos pos = iter->pos;
@@ -143,15 +138,37 @@ void bch2_btree_node_unlock_write(struct btree_trans *trans,
bch2_btree_node_unlock_write_inlined(trans, path, b);
}
-void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *trans,
+ struct btree_path *skip,
+ struct btree *b,
+ unsigned level)
{
- struct btree_path *linked;
- unsigned readers = 0;
+ struct btree_path *path;
+ struct six_lock_count ret = { 0, 0 };
+
+ if (IS_ERR_OR_NULL(b))
+ return ret;
+
+ trans_for_each_path(trans, path)
+ if (path != skip && path->l[level].b == b) {
+ ret.read += btree_node_read_locked(path, level);
+ ret.intent += btree_node_intent_locked(path, level);
+ }
+
+ return ret;
+}
- trans_for_each_path(trans, linked)
- if (linked->l[b->c.level].b == b &&
- btree_node_read_locked(linked, b->c.level))
- readers++;
+static inline void six_lock_readers_add(struct six_lock *lock, int nr)
+{
+ if (!lock->readers)
+ atomic64_add(__SIX_VAL(read_lock, nr), &lock->state.counter);
+ else
+ this_cpu_add(*lock->readers, nr);
+}
+
+void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
+{
+ int readers = bch2_btree_node_lock_counts(trans, NULL, b, b->c.level).read;
/*
* Must drop our read locks before calling six_lock_write() -
@@ -159,19 +176,9 @@ void __bch2_btree_node_lock_write(struct btree_trans *trans, struct btree *b)
* goes to 0, and it's safe because we have the node intent
* locked:
*/
- if (!b->c.lock.readers)
- atomic64_sub(__SIX_VAL(read_lock, readers),
- &b->c.lock.state.counter);
- else
- this_cpu_sub(*b->c.lock.readers, readers);
-
+ six_lock_readers_add(&b->c.lock, -readers);
six_lock_write(&b->c.lock, NULL, NULL);
-
- if (!b->c.lock.readers)
- atomic64_add(__SIX_VAL(read_lock, readers),
- &b->c.lock.state.counter);
- else
- this_cpu_add(*b->c.lock.readers, readers);
+ six_lock_readers_add(&b->c.lock, readers);
}
bool __bch2_btree_node_relock(struct btree_trans *trans,
@@ -193,14 +200,9 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
return true;
}
fail:
- if (b != BTREE_ITER_NO_NODE_CACHED &&
- b != BTREE_ITER_NO_NODE_INIT)
- trace_btree_node_relock_fail(trans->fn, _RET_IP_,
- path->btree_id,
- &path->pos,
- (unsigned long) b,
- path->l[level].lock_seq,
- is_btree_node(path, level) ? b->c.lock.state.seq : 0);
+ if (b != ERR_PTR(-BCH_ERR_no_btree_node_cached) &&
+ b != ERR_PTR(-BCH_ERR_no_btree_node_init))
+ trace_btree_node_relock_fail(trans, _RET_IP_, path, level);
return false;
}
@@ -236,10 +238,11 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
if (btree_node_lock_seq_matches(path, b, level) &&
btree_node_lock_increment(trans, b, level, BTREE_NODE_INTENT_LOCKED)) {
- btree_node_unlock(path, level);
+ btree_node_unlock(trans, path, level);
goto success;
}
+ trace_btree_node_upgrade_fail(trans, _RET_IP_, path, level);
return false;
success:
mark_btree_node_intent_locked(trans, path, level);
@@ -271,11 +274,13 @@ static inline bool btree_path_get_locks(struct btree_trans *trans,
* the node that we failed to relock:
*/
if (fail_idx >= 0) {
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
do {
- path->l[fail_idx].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ path->l[fail_idx].b = upgrade
+ ? ERR_PTR(-BCH_ERR_no_btree_node_upgrade)
+ : ERR_PTR(-BCH_ERR_no_btree_node_relock);
--fail_idx;
} while (fail_idx >= 0);
}
@@ -297,13 +302,13 @@ static struct bpos btree_node_pos(struct btree_bkey_cached_common *_b,
}
/* Slowpath: */
-bool __bch2_btree_node_lock(struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b,
- struct bpos pos, unsigned level,
- enum six_lock_type type,
- six_lock_should_sleep_fn should_sleep_fn, void *p,
- unsigned long ip)
+int __bch2_btree_node_lock(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b,
+ struct bpos pos, unsigned level,
+ enum six_lock_type type,
+ six_lock_should_sleep_fn should_sleep_fn, void *p,
+ unsigned long ip)
{
struct btree_path *linked;
unsigned reason;
@@ -373,16 +378,8 @@ bool __bch2_btree_node_lock(struct btree_trans *trans,
return btree_node_lock_type(trans, path, b, pos, level,
type, should_sleep_fn, p);
deadlock:
- trace_trans_restart_would_deadlock(trans->fn, ip,
- trans->in_traverse_all, reason,
- linked->btree_id,
- linked->cached,
- &linked->pos,
- path->btree_id,
- path->cached,
- &pos);
- btree_trans_restart(trans);
- return false;
+ trace_trans_restart_would_deadlock(trans, ip, reason, linked, path, &pos);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
}
/* Btree iterator locking: */
@@ -420,8 +417,8 @@ static inline void bch2_btree_path_verify_locks(struct btree_path *path) {}
/*
* Only for btree_cache.c - only relocks intent locks
*/
-bool bch2_btree_path_relock_intent(struct btree_trans *trans,
- struct btree_path *path)
+int bch2_btree_path_relock_intent(struct btree_trans *trans,
+ struct btree_path *path)
{
unsigned l;
@@ -429,30 +426,32 @@ bool bch2_btree_path_relock_intent(struct btree_trans *trans,
l < path->locks_want && btree_path_node(path, l);
l++) {
if (!bch2_btree_node_relock(trans, path, l)) {
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
- trace_trans_restart_relock_path_intent(trans->fn, _RET_IP_,
- path->btree_id, &path->pos);
- btree_trans_restart(trans);
- return false;
+ trace_trans_restart_relock_path_intent(trans, _RET_IP_, path);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path_intent);
}
}
- return true;
+ return 0;
}
__flatten
-static bool bch2_btree_path_relock(struct btree_trans *trans,
+static bool bch2_btree_path_relock_norestart(struct btree_trans *trans,
struct btree_path *path, unsigned long trace_ip)
{
- bool ret = btree_path_get_locks(trans, path, false);
+ return btree_path_get_locks(trans, path, false);
+}
- if (!ret) {
- trace_trans_restart_relock_path(trans->fn, trace_ip,
- path->btree_id, &path->pos);
- btree_trans_restart(trans);
+static int bch2_btree_path_relock(struct btree_trans *trans,
+ struct btree_path *path, unsigned long trace_ip)
+{
+ if (!bch2_btree_path_relock_norestart(trans, path, trace_ip)) {
+ trace_trans_restart_relock_path(trans, trace_ip, path);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_path);
}
- return ret;
+
+ return 0;
}
bool __bch2_btree_path_upgrade(struct btree_trans *trans,
@@ -500,7 +499,8 @@ bool __bch2_btree_path_upgrade(struct btree_trans *trans,
return false;
}
-void __bch2_btree_path_downgrade(struct btree_path *path,
+void __bch2_btree_path_downgrade(struct btree_trans *trans,
+ struct btree_path *path,
unsigned new_locks_want)
{
unsigned l;
@@ -512,7 +512,7 @@ void __bch2_btree_path_downgrade(struct btree_path *path,
while (path->nodes_locked &&
(l = __fls(path->nodes_locked)) >= path->locks_want) {
if (l > path->level) {
- btree_node_unlock(path, l);
+ btree_node_unlock(trans, path, l);
} else {
if (btree_node_intent_locked(path, l)) {
six_lock_downgrade(&path->l[l].b->c.lock);
@@ -530,27 +530,26 @@ void bch2_trans_downgrade(struct btree_trans *trans)
struct btree_path *path;
trans_for_each_path(trans, path)
- bch2_btree_path_downgrade(path);
+ bch2_btree_path_downgrade(trans, path);
}
/* Btree transaction locking: */
-bool bch2_trans_relock(struct btree_trans *trans)
+int bch2_trans_relock(struct btree_trans *trans)
{
struct btree_path *path;
if (unlikely(trans->restarted))
- return false;
+ return -BCH_ERR_transaction_restart_relock;
trans_for_each_path(trans, path)
if (path->should_be_locked &&
- !bch2_btree_path_relock(trans, path, _RET_IP_)) {
- trace_trans_restart_relock(trans->fn, _RET_IP_,
- path->btree_id, &path->pos);
+ bch2_btree_path_relock(trans, path, _RET_IP_)) {
+ trace_trans_restart_relock(trans, _RET_IP_, path);
BUG_ON(!trans->restarted);
- return false;
+ return -BCH_ERR_transaction_restart_relock;
}
- return true;
+ return 0;
}
void bch2_trans_unlock(struct btree_trans *trans)
@@ -558,7 +557,7 @@ void bch2_trans_unlock(struct btree_trans *trans)
struct btree_path *path;
trans_for_each_path(trans, path)
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
/*
* bch2_gc_btree_init_recurse() doesn't use btree iterators for walking
@@ -586,7 +585,7 @@ static void bch2_btree_path_verify_cached(struct btree_trans *trans,
bkey_cmp(ck->key.pos, path->pos));
if (!locked)
- btree_node_unlock(path, 0);
+ btree_node_unlock(trans, path, 0);
}
static void bch2_btree_path_verify_level(struct btree_trans *trans,
@@ -643,7 +642,7 @@ static void bch2_btree_path_verify_level(struct btree_trans *trans,
}
if (!locked)
- btree_node_unlock(path, level);
+ btree_node_unlock(trans, path, level);
return;
err:
bch2_bpos_to_text(&buf1, path->pos);
@@ -1020,27 +1019,29 @@ static inline struct bkey_s_c btree_path_level_peek_all(struct bch_fs *c,
bch2_btree_node_iter_peek_all(&l->iter, l->b));
}
-static inline struct bkey_s_c btree_path_level_peek(struct bch_fs *c,
+static inline struct bkey_s_c btree_path_level_peek(struct btree_trans *trans,
struct btree_path *path,
struct btree_path_level *l,
struct bkey *u)
{
- struct bkey_s_c k = __btree_iter_unpack(c, l, u,
+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
bch2_btree_node_iter_peek(&l->iter, l->b));
path->pos = k.k ? k.k->p : l->b->key.k.p;
+ bch2_btree_path_verify_level(trans, path, l - path->l);
return k;
}
-static inline struct bkey_s_c btree_path_level_prev(struct bch_fs *c,
+static inline struct bkey_s_c btree_path_level_prev(struct btree_trans *trans,
struct btree_path *path,
struct btree_path_level *l,
struct bkey *u)
{
- struct bkey_s_c k = __btree_iter_unpack(c, l, u,
+ struct bkey_s_c k = __btree_iter_unpack(trans->c, l, u,
bch2_btree_node_iter_prev(&l->iter, l->b));
path->pos = k.k ? k.k->p : l->b->data->min_key;
+ bch2_btree_path_verify_level(trans, path, l - path->l);
return k;
}
@@ -1115,7 +1116,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
}
if (!parent_locked)
- btree_node_unlock(path, plevel);
+ btree_node_unlock(trans, path, plevel);
}
static inline void __btree_path_level_init(struct btree_path *path,
@@ -1167,7 +1168,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
if (path->nodes_locked &&
t != BTREE_NODE_UNLOCKED) {
- btree_node_unlock(path, b->c.level);
+ btree_node_unlock(trans, path, b->c.level);
six_lock_increment(&b->c.lock, t);
mark_btree_node_locked(trans, path, b->c.level, t);
}
@@ -1195,7 +1196,9 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
struct btree *b = container_of(lock, struct btree, c.lock);
struct btree **rootp = p;
- return b == *rootp ? 0 : -1;
+ if (b != *rootp)
+ return BCH_ERR_lock_fail_root_changed;
+ return 0;
}
static inline int btree_path_lock_root(struct btree_trans *trans,
@@ -1207,6 +1210,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
struct btree *b, **rootp = &c->btree_roots[path->btree_id].b;
enum six_lock_type lock_type;
unsigned i;
+ int ret;
EBUG_ON(path->nodes_locked);
@@ -1228,20 +1232,23 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
}
lock_type = __btree_lock_want(path, path->level);
- if (unlikely(!btree_node_lock(trans, path, b, SPOS_MAX,
- path->level, lock_type,
- lock_root_check_fn, rootp,
- trace_ip))) {
- if (trans->restarted)
- return -EINTR;
- continue;
+ ret = btree_node_lock(trans, path, b, SPOS_MAX,
+ path->level, lock_type,
+ lock_root_check_fn, rootp,
+ trace_ip);
+ if (unlikely(ret)) {
+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_root_changed))
+ continue;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ return ret;
+ BUG();
}
if (likely(b == READ_ONCE(*rootp) &&
b->c.level == path->level &&
!race_fault())) {
for (i = 0; i < path->level; i++)
- path->l[i].b = BTREE_ITER_NO_NODE_LOCK_ROOT;
+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_lock_root);
path->l[path->level].b = b;
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
@@ -1286,7 +1293,7 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
}
if (!was_locked)
- btree_node_unlock(path, path->level);
+ btree_node_unlock(trans, path, path->level);
bch2_bkey_buf_exit(&tmp, c);
return ret;
@@ -1321,7 +1328,7 @@ static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *p
}
if (!was_locked)
- btree_node_unlock(path, path->level);
+ btree_node_unlock(trans, path, path->level);
bch2_bkey_buf_exit(&tmp, c);
return ret;
@@ -1346,7 +1353,7 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
bp->mem_ptr = (unsigned long)b;
if (!locked)
- btree_node_unlock(path, plevel);
+ btree_node_unlock(trans, path, plevel);
}
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
@@ -1419,7 +1426,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
btree_node_mem_ptr_set(trans, path, level + 1, b);
if (btree_node_read_locked(path, level + 1))
- btree_node_unlock(path, level + 1);
+ btree_node_unlock(trans, path, level + 1);
path->level = level;
bch2_btree_path_verify_locks(path);
@@ -1439,11 +1446,11 @@ static int bch2_btree_path_traverse_all(struct btree_trans *trans)
int i, ret = 0;
if (trans->in_traverse_all)
- return -EINTR;
+ return -BCH_ERR_transaction_restart_in_traverse_all;
trans->in_traverse_all = true;
retry_all:
- trans->restarted = false;
+ trans->restarted = 0;
trans->traverse_all_idx = U8_MAX;
trans_for_each_path(trans, path)
@@ -1487,7 +1494,8 @@ retry_all:
*/
if (path->uptodate) {
ret = btree_path_traverse_one(trans, path, 0, _THIS_IP_);
- if (ret == -EINTR || ret == -ENOMEM)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ ret == -ENOMEM)
goto retry_all;
if (ret)
goto err;
@@ -1509,7 +1517,7 @@ err:
trans->in_traverse_all = false;
- trace_trans_traverse_all(trans->fn, trace_ip);
+ trace_trans_traverse_all(trans, trace_ip);
return ret;
}
@@ -1528,14 +1536,6 @@ static inline bool btree_path_good_node(struct btree_trans *trans,
return true;
}
-static void btree_path_set_level_up(struct btree_path *path)
-{
- btree_node_unlock(path, path->level);
- path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
- path->level++;
- btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
-}
-
static void btree_path_set_level_down(struct btree_trans *trans,
struct btree_path *path,
unsigned new_level)
@@ -1546,7 +1546,7 @@ static void btree_path_set_level_down(struct btree_trans *trans,
for (l = path->level + 1; l < BTREE_MAX_DEPTH; l++)
if (btree_lock_want(path, l) == BTREE_NODE_UNLOCKED)
- btree_node_unlock(path, l);
+ btree_node_unlock(trans, path, l);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
bch2_btree_path_verify(trans, path);
@@ -1559,22 +1559,16 @@ static inline unsigned btree_path_up_until_good_node(struct btree_trans *trans,
unsigned i, l = path->level;
while (btree_path_node(path, l) &&
- !btree_path_good_node(trans, path, l, check_pos)) {
- btree_node_unlock(path, l);
- path->l[l].b = BTREE_ITER_NO_NODE_UP;
- l++;
- }
+ !btree_path_good_node(trans, path, l, check_pos))
+ __btree_path_set_level_up(trans, path, l++);
/* If we need intent locks, take them too: */
for (i = l + 1;
i < path->locks_want && btree_path_node(path, i);
i++)
if (!bch2_btree_node_relock(trans, path, i))
- while (l <= i) {
- btree_node_unlock(path, l);
- path->l[l].b = BTREE_ITER_NO_NODE_UP;
- l++;
- }
+ while (l <= i)
+ __btree_path_set_level_up(trans, path, l++);
return l;
}
@@ -1594,19 +1588,17 @@ static int btree_path_traverse_one(struct btree_trans *trans,
unsigned long trace_ip)
{
unsigned depth_want = path->level;
- int ret = 0;
+ int ret = trans->restarted;
- if (unlikely(trans->restarted)) {
- ret = -EINTR;
+ if (unlikely(ret))
goto out;
- }
/*
* Ensure we obey path->should_be_locked: if it's set, we can't unlock
* and re-traverse the path without a transaction restart:
*/
if (path->should_be_locked) {
- ret = bch2_btree_path_relock(trans, path, trace_ip) ? 0 : -EINTR;
+ ret = bch2_btree_path_relock(trans, path, trace_ip);
goto out;
}
@@ -1640,22 +1632,16 @@ static int btree_path_traverse_one(struct btree_trans *trans,
goto out;
}
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
path->level = depth_want;
-
- if (ret == -EIO)
- path->l[path->level].b =
- BTREE_ITER_NO_NODE_ERROR;
- else
- path->l[path->level].b =
- BTREE_ITER_NO_NODE_DOWN;
+ path->l[path->level].b = ERR_PTR(ret);
goto out;
}
}
path->uptodate = BTREE_ITER_UPTODATE;
out:
- BUG_ON((ret == -EINTR) != !!trans->restarted);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
bch2_btree_path_verify(trans, path);
return ret;
}
@@ -1663,6 +1649,16 @@ out:
int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
struct btree_path *path, unsigned flags)
{
+ if (0 && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
+ unsigned restart_probability_bits = 4 << min(trans->restart_count, 32U);
+ u64 mask = ~(~0ULL << restart_probability_bits);
+
+ if ((prandom_u32() & mask) == mask) {
+ trace_transaction_restart_injected(trans, _RET_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_fault_inject);
+ }
+ }
+
if (path->uptodate < BTREE_ITER_NEED_RELOCK)
return 0;
@@ -1737,8 +1733,8 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
bch2_btree_path_check_sort(trans, path, cmp);
if (unlikely(path->cached)) {
- btree_node_unlock(path, 0);
- path->l[0].b = BTREE_ITER_NO_NODE_CACHED;
+ btree_node_unlock(trans, path, 0);
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
goto out;
}
@@ -1760,7 +1756,7 @@ bch2_btree_path_set_pos(struct btree_trans *trans,
if (l != path->level) {
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
}
out:
bch2_btree_path_verify(trans, path);
@@ -1771,37 +1767,37 @@ out:
static struct btree_path *have_path_at_pos(struct btree_trans *trans, struct btree_path *path)
{
- struct btree_path *next;
+ struct btree_path *sib;
- next = prev_btree_path(trans, path);
- if (next && !btree_path_cmp(next, path))
- return next;
+ sib = prev_btree_path(trans, path);
+ if (sib && !btree_path_cmp(sib, path))
+ return sib;
- next = next_btree_path(trans, path);
- if (next && !btree_path_cmp(next, path))
- return next;
+ sib = next_btree_path(trans, path);
+ if (sib && !btree_path_cmp(sib, path))
+ return sib;
return NULL;
}
static struct btree_path *have_node_at_pos(struct btree_trans *trans, struct btree_path *path)
{
- struct btree_path *next;
+ struct btree_path *sib;
- next = prev_btree_path(trans, path);
- if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
- return next;
+ sib = prev_btree_path(trans, path);
+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+ return sib;
- next = next_btree_path(trans, path);
- if (next && next->level == path->level && path_l(next)->b == path_l(path)->b)
- return next;
+ sib = next_btree_path(trans, path);
+ if (sib && sib->level == path->level && path_l(sib)->b == path_l(path)->b)
+ return sib;
return NULL;
}
static inline void __bch2_path_free(struct btree_trans *trans, struct btree_path *path)
{
- __bch2_btree_path_unlock(path);
+ __bch2_btree_path_unlock(trans, path);
btree_path_list_remove(trans, path);
trans->paths_allocated &= ~(1ULL << path->idx);
}
@@ -1816,26 +1812,23 @@ void bch2_path_put(struct btree_trans *trans, struct btree_path *path, bool inte
if (!__btree_path_put(path, intent))
return;
- /*
- * Perhaps instead we should check for duplicate paths in traverse_all:
- */
- if (path->preserve &&
- (dup = have_path_at_pos(trans, path))) {
- dup->preserve = true;
- path->preserve = false;
- goto free;
- }
+ dup = path->preserve
+ ? have_path_at_pos(trans, path)
+ : have_node_at_pos(trans, path);
+
+ if (!dup && !(!path->preserve && !is_btree_node(path, path->level)))
+ return;
- if (!path->preserve &&
- (dup = have_node_at_pos(trans, path)))
- goto free;
- return;
-free:
if (path->should_be_locked &&
- !btree_node_locked(dup, path->level))
+ !trans->restarted &&
+ (!dup || !bch2_btree_path_relock_norestart(trans, dup, _THIS_IP_)))
return;
- dup->should_be_locked |= path->should_be_locked;
+ if (dup) {
+ dup->preserve |= path->preserve;
+ dup->should_be_locked |= path->should_be_locked;
+ }
+
__bch2_path_free(trans, path);
}
@@ -1891,10 +1884,10 @@ void bch2_dump_trans_paths_updates(struct btree_trans *trans)
bch2_bpos_to_text(&buf, path->pos);
- printk(KERN_ERR "path: idx %u ref %u:%u%s%s btree=%s l=%u pos %s locks %u %pS\n",
+ printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n",
path->idx, path->ref, path->intent_ref,
- path->should_be_locked ? " S" : "",
- path->preserve ? " P" : "",
+ path->preserve ? 'P' : ' ',
+ path->should_be_locked ? 'S' : ' ',
bch2_btree_ids[path->btree_id],
path->level,
buf.buf,
@@ -1947,6 +1940,7 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
struct btree_path *path, *path_pos = NULL;
bool cached = flags & BTREE_ITER_CACHED;
bool intent = flags & BTREE_ITER_INTENT;
+ bool have_dup = false;
int i;
BUG_ON(trans->restarted);
@@ -1954,14 +1948,24 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
bch2_trans_verify_locks(trans);
trans_for_each_path_inorder(trans, path, i) {
- if (__btree_path_cmp(path,
- btree_id,
- cached,
- pos,
- level) > 0)
+ int cmp = __btree_path_cmp(path,
+ btree_id,
+ cached,
+ pos,
+ level);
+ if (cmp > 0)
break;
path_pos = path;
+
+ if (cmp == 0) {
+ if (path->ref || path->preserve) {
+ path->preserve = true;
+ have_dup = true;
+ } else {
+ break;
+ }
+ }
}
if (path_pos &&
@@ -1985,14 +1989,14 @@ struct btree_path *bch2_path_get(struct btree_trans *trans,
path->nodes_locked = 0;
path->nodes_intent_locked = 0;
for (i = 0; i < ARRAY_SIZE(path->l); i++)
- path->l[i].b = BTREE_ITER_NO_NODE_INIT;
+ path->l[i].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
#ifdef CONFIG_BCACHEFS_DEBUG
path->ip_allocated = ip;
#endif
btree_trans_verify_sorted(trans);
}
- if (!(flags & BTREE_ITER_NOPRESERVE))
+ if (!(flags & BTREE_ITER_NOPRESERVE) && !have_dup)
path->preserve = true;
if (path->intent_ref)
@@ -2039,11 +2043,7 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
EBUG_ON(ck &&
(path->btree_id != ck->key.btree_id ||
bkey_cmp(path->pos, ck->key.pos)));
-
- /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
- if (unlikely(!ck || !ck->valid))
- return bkey_s_c_null;
-
+ EBUG_ON(!ck || !ck->valid);
EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
*u = ck->k->k;
@@ -2079,7 +2079,7 @@ bch2_btree_iter_traverse(struct btree_iter *iter)
if (ret)
return ret;
- iter->path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->path);
return 0;
}
@@ -2110,8 +2110,7 @@ struct btree *bch2_btree_iter_peek_node(struct btree_iter *iter)
iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
- iter->path->should_be_locked = true;
- BUG_ON(iter->path->uptodate);
+ btree_path_set_should_be_locked(iter->path);
out:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
@@ -2139,28 +2138,24 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
/* got to end? */
if (!btree_path_node(path, path->level + 1)) {
- btree_path_set_level_up(path);
+ btree_path_set_level_up(trans, path);
return NULL;
}
if (!bch2_btree_node_relock(trans, path, path->level + 1)) {
- __bch2_btree_path_unlock(path);
- path->l[path->level].b = BTREE_ITER_NO_NODE_GET_LOCKS;
- path->l[path->level + 1].b = BTREE_ITER_NO_NODE_GET_LOCKS;
+ __bch2_btree_path_unlock(trans, path);
+ path->l[path->level].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
+ path->l[path->level + 1].b = ERR_PTR(-BCH_ERR_no_btree_node_relock);
btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
- trace_trans_restart_relock_next_node(trans->fn, _THIS_IP_,
- path->btree_id, &path->pos);
- btree_trans_restart(trans);
- ret = -EINTR;
+ trace_trans_restart_relock_next_node(trans, _THIS_IP_, path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_relock);
goto err;
}
b = btree_path_node(path, path->level + 1);
if (!bpos_cmp(iter->pos, b->key.k.p)) {
- btree_node_unlock(path, path->level);
- path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
- path->level++;
+ __btree_path_set_level_up(trans, path, path->level++);
} else {
/*
* Haven't gotten to the end of the parent node: go back down to
@@ -2186,7 +2181,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
iter->path = bch2_btree_path_set_pos(trans, iter->path, b->key.k.p,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
- iter->path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->path);
BUG_ON(iter->path->uptodate);
out:
bch2_btree_iter_verify_entry_exit(iter);
@@ -2328,7 +2323,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
if (unlikely(ret))
return bkey_s_c_err(ret);
- iter->key_cache_path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->key_cache_path);
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
}
@@ -2356,7 +2351,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
goto out;
}
- iter->path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->path);
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
@@ -2444,7 +2439,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
while (1) {
k = __bch2_btree_iter_peek(iter, search_key);
if (!k.k || bkey_err(k))
- goto out;
+ goto out_no_locked;
/*
* iter->pos should be mononotically increasing, and always be
@@ -2461,7 +2456,7 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
if (bkey_cmp(iter_pos, end) > 0) {
bch2_btree_iter_set_pos(iter, end);
k = bkey_s_c_null;
- goto out;
+ goto out_no_locked;
}
if (iter->update_path &&
@@ -2523,18 +2518,16 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
iter->path = bch2_btree_path_set_pos(trans, iter->path, k.k->p,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
- BUG_ON(!iter->path->nodes_locked);
-out:
+
+ btree_path_set_should_be_locked(iter->path);
+out_no_locked:
if (iter->update_path) {
if (iter->update_path->uptodate &&
- !bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)) {
- k = bkey_s_c_err(-EINTR);
- } else {
- BUG_ON(!(iter->update_path->nodes_locked & 1));
- iter->update_path->should_be_locked = true;
- }
+ (ret = bch2_btree_path_relock(trans, iter->update_path, _THIS_IP_)))
+ k = bkey_s_c_err(ret);
+ else
+ btree_path_set_should_be_locked(iter->update_path);
}
- iter->path->should_be_locked = true;
if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
iter->pos.snapshot = iter->snapshot;
@@ -2578,13 +2571,13 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
k = bkey_s_c_err(ret);
- goto out;
+ goto out_no_locked;
}
/* Already at end? */
if (!btree_path_node(iter->path, iter->path->level)) {
k = bkey_s_c_null;
- goto out;
+ goto out_no_locked;
}
k = btree_path_level_peek_all(trans->c,
@@ -2595,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
(iter->advanced &&
!bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) {
iter->pos = path_l(iter->path)->b->key.k.p;
- btree_path_set_level_up(iter->path);
+ btree_path_set_level_up(trans, iter->path);
iter->advanced = false;
continue;
}
@@ -2637,8 +2630,8 @@ struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
}
iter->pos = k.k->p;
-out:
- iter->path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->path);
+out_no_locked:
bch2_btree_iter_verify(iter);
return k;
@@ -2692,16 +2685,16 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
/* ensure that iter->k is consistent with iter->pos: */
bch2_btree_iter_set_pos(iter, iter->pos);
k = bkey_s_c_err(ret);
- goto out;
+ goto out_no_locked;
}
- k = btree_path_level_peek(trans->c, iter->path,
+ k = btree_path_level_peek(trans, iter->path,
&iter->path->l[0], &iter->k);
if (!k.k ||
((iter->flags & BTREE_ITER_IS_EXTENTS)
? bpos_cmp(bkey_start_pos(k.k), search_key) >= 0
: bpos_cmp(k.k->p, search_key) > 0))
- k = btree_path_level_prev(trans->c, iter->path,
+ k = btree_path_level_prev(trans, iter->path,
&iter->path->l[0], &iter->k);
bch2_btree_path_check_sort(trans, iter->path, 0);
@@ -2758,7 +2751,7 @@ got_key:
/* Start of btree: */
bch2_btree_iter_set_pos(iter, POS_MIN);
k = bkey_s_c_null;
- goto out;
+ goto out_no_locked;
}
}
@@ -2770,10 +2763,11 @@ got_key:
if (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)
iter->pos.snapshot = iter->snapshot;
-out:
+
+ btree_path_set_should_be_locked(iter->path);
+out_no_locked:
if (saved_path)
bch2_path_put(trans, saved_path, iter->flags & BTREE_ITER_INTENT);
- iter->path->should_be_locked = true;
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
@@ -2846,9 +2840,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
(k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
- if (!bkey_err(k))
+ if (bkey_err(k)) {
+ goto out_no_locked;
+ } else {
iter->k = *k.k;
- goto out;
+ goto out;
+ }
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
@@ -2902,8 +2899,8 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
}
out:
- iter->path->should_be_locked = true;
-
+ btree_path_set_should_be_locked(iter->path);
+out_no_locked:
bch2_btree_iter_verify_entry_exit(iter);
bch2_btree_iter_verify(iter);
ret = bch2_btree_iter_verify_ret(iter, k);
@@ -3184,9 +3181,8 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
trans->mem_bytes = new_bytes;
if (old_bytes) {
- trace_trans_restart_mem_realloced(trans->fn, _RET_IP_, new_bytes);
- btree_trans_restart(trans);
- return ERR_PTR(-EINTR);
+ trace_trans_restart_mem_realloced(trans, _RET_IP_, new_bytes);
+ return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
}
}
@@ -3200,11 +3196,11 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
* bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
*
- * While iterating over nodes or updating nodes a attempt to lock a btree
- * node may return EINTR when the trylock fails. When this occurs
- * bch2_trans_begin() should be called and the transaction retried.
+ * While iterating over nodes or updating nodes a attempt to lock a btree node
+ * may return BCH_ERR_transaction_restart when the trylock fails. When this
+ * occurs bch2_trans_begin() should be called and the transaction retried.
*/
-void bch2_trans_begin(struct btree_trans *trans)
+u32 bch2_trans_begin(struct btree_trans *trans)
{
struct btree_path *path;
@@ -3250,11 +3246,20 @@ void bch2_trans_begin(struct btree_trans *trans)
bch2_trans_relock(trans);
}
+ trans->last_restarted_ip = _RET_IP_;
if (trans->restarted)
bch2_btree_path_traverse_all(trans);
- trans->restarted = false;
trans->last_begin_time = ktime_get_ns();
+ return trans->restart_count;
+}
+
+void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
+{
+ bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans,
+ "trans->restart_count %u, should be %u, last restarted by %ps\n",
+ trans->restart_count, restart_count,
+ (void *) trans->last_restarted_ip);
}
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@@ -3291,6 +3296,15 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->last_begin_time = ktime_get_ns();
trans->task = current;
+ while (c->lock_held_stats.names[trans->lock_name_idx] != fn
+ && c->lock_held_stats.names[trans->lock_name_idx] != 0)
+ trans->lock_name_idx++;
+
+ if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
+ pr_warn_once("lock_times array not big enough!");
+ else
+ c->lock_held_stats.names[trans->lock_name_idx] = fn;
+
bch2_trans_alloc_paths(trans, c);
if (expected_mem_bytes) {
@@ -3393,18 +3407,18 @@ void bch2_trans_exit(struct btree_trans *trans)
static void __maybe_unused
bch2_btree_path_node_to_text(struct printbuf *out,
- struct btree_bkey_cached_common *_b,
+ struct btree_bkey_cached_common *b,
bool cached)
{
prt_printf(out, " l=%u %s:",
- _b->level, bch2_btree_ids[_b->btree_id]);
- bch2_bpos_to_text(out, btree_node_pos(_b, cached));
+ b->level, bch2_btree_ids[b->btree_id]);
+ bch2_bpos_to_text(out, btree_node_pos(b, cached));
}
void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
{
struct btree_path *path;
- struct btree *b;
+ struct btree_bkey_cached_common *b;
static char lock_types[] = { 'r', 'i', 'w' };
unsigned l;
@@ -3423,12 +3437,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
prt_printf(out, "\n");
for (l = 0; l < BTREE_MAX_DEPTH; l++) {
- if (btree_node_locked(path, l)) {
+ if (btree_node_locked(path, l) &&
+ !IS_ERR_OR_NULL(b = (void *) READ_ONCE(path->l[l].b))) {
prt_printf(out, " %s l=%u ",
btree_node_intent_locked(path, l) ? "i" : "r", l);
- bch2_btree_path_node_to_text(out,
- (void *) path->l[l].b,
- path->cached);
+ bch2_btree_path_node_to_text(out, b, path->cached);
prt_printf(out, "\n");
}
}
@@ -3446,8 +3459,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
bch2_bpos_to_text(out, trans->locking_pos);
prt_printf(out, " node ");
- bch2_btree_path_node_to_text(out,
- (void *) b, path->cached);
+ bch2_btree_path_node_to_text(out, b, path->cached);
prt_printf(out, "\n");
}
}
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 4b9d03b8..f38fd25b 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -5,6 +5,8 @@
#include "bset.h"
#include "btree_types.h"
+#include <trace/events/bcachefs.h>
+
static inline void __btree_path_get(struct btree_path *path, bool intent)
{
path->ref++;
@@ -159,19 +161,36 @@ void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
struct btree *, struct btree_node_iter *,
struct bkey_packed *, unsigned, unsigned);
-bool bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
+int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);
void bch2_path_put(struct btree_trans *, struct btree_path *, bool);
-bool bch2_trans_relock(struct btree_trans *);
+int bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
+static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count)
+{
+ return restart_count != trans->restart_count;
+}
+
+void bch2_trans_verify_not_restarted(struct btree_trans *, u32);
+
+__always_inline
+static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
+{
+ BUG_ON(err <= 0);
+ BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
+
+ trans->restarted = err;
+ trans->restart_count++;
+ return -err;
+}
+
__always_inline
-static inline int btree_trans_restart(struct btree_trans *trans)
+static inline int btree_trans_restart(struct btree_trans *trans, int err)
{
- trans->restarted = true;
- bch2_trans_unlock(trans);
- return -EINTR;
+ btree_trans_restart_nounlock(trans, err);
+ return -err;
}
bool bch2_btree_node_upgrade(struct btree_trans *,
@@ -191,14 +210,15 @@ static inline bool bch2_btree_path_upgrade(struct btree_trans *trans,
: path->uptodate == BTREE_ITER_UPTODATE;
}
-void __bch2_btree_path_downgrade(struct btree_path *, unsigned);
+void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);
-static inline void bch2_btree_path_downgrade(struct btree_path *path)
+static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
+ struct btree_path *path)
{
unsigned new_locks_want = path->level + !!path->intent_ref;
if (path->locks_want > new_locks_want)
- __bch2_btree_path_downgrade(path, new_locks_want);
+ __bch2_btree_path_downgrade(trans, path, new_locks_want);
}
void bch2_trans_downgrade(struct btree_trans *);
@@ -279,11 +299,12 @@ void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
static inline void set_btree_iter_dontneed(struct btree_iter *iter)
{
- iter->path->preserve = false;
+ if (!iter->trans->restarted)
+ iter->path->preserve = false;
}
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-void bch2_trans_begin(struct btree_trans *);
+u32 bch2_trans_begin(struct btree_trans *);
static inline struct btree *
__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
@@ -291,7 +312,7 @@ __btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter
struct btree *b;
while (b = bch2_btree_iter_peek_node(iter),
- PTR_ERR_OR_ZERO(b) == -EINTR)
+ bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
bch2_trans_begin(trans);
return b;
@@ -315,6 +336,15 @@ static inline int bkey_err(struct bkey_s_c k)
return PTR_ERR_OR_ZERO(k.k);
}
+static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
+ unsigned flags)
+{
+ BUG_ON(flags & BTREE_ITER_ALL_LEVELS);
+
+ return flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) :
+ bch2_btree_iter_peek_prev(iter);
+}
+
static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
unsigned flags)
{
@@ -338,8 +368,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- return hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2
- ? -EINTR : 0;
+ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) {
+ trace_trans_restart_too_many_iters(trans, _THIS_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
+ }
+
+ return 0;
}
static inline struct bkey_s_c
@@ -350,12 +384,52 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
while (btree_trans_too_many_iters(trans) ||
(k = bch2_btree_iter_peek_type(iter, flags),
- bkey_err(k) == -EINTR))
+ bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
bch2_trans_begin(trans);
return k;
}
+#define lockrestart_do(_trans, _do) \
+({ \
+ int _ret; \
+ \
+ do { \
+ bch2_trans_begin(_trans); \
+ _ret = (_do); \
+ } while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \
+ \
+ _ret; \
+})
+
+/*
+ * nested_lockrestart_do(), nested_commit_do():
+ *
+ * These are like lockrestart_do() and commit_do(), with two differences:
+ *
+ * - We don't call bch2_trans_begin() unless we had a transaction restart
+ * - We return -BCH_ERR_transaction_restart_nested if we succeeded after a
+ * transaction restart
+ */
+#define nested_lockrestart_do(_trans, _do) \
+({ \
+ u32 _restart_count, _orig_restart_count; \
+ int _ret; \
+ \
+ _restart_count = _orig_restart_count = (_trans)->restart_count; \
+ \
+ while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\
+ _restart_count = bch2_trans_begin(_trans); \
+ \
+ if (!_ret) \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
+ \
+ if (!_ret && trans_was_restarted(_trans, _orig_restart_count)) \
+ _ret = -BCH_ERR_transaction_restart_nested; \
+ \
+ _ret; \
+})
+
#define for_each_btree_key2(_trans, _iter, _btree_id, \
_start, _flags, _k, _do) \
({ \
@@ -364,7 +438,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
(_start), (_flags)); \
\
- do { \
+ while (1) { \
bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
if (!(_k).k) { \
@@ -373,9 +447,42 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
} \
\
_ret = bkey_err(_k) ?: (_do); \
- if (!_ret) \
- bch2_btree_iter_advance(&(_iter)); \
- } while (_ret == 0 || _ret == -EINTR); \
+ if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+ continue; \
+ if (_ret) \
+ break; \
+ if (!bch2_btree_iter_advance(&(_iter))) \
+ break; \
+ } \
+ \
+ bch2_trans_iter_exit((_trans), &(_iter)); \
+ _ret; \
+})
+
+#define for_each_btree_key_reverse(_trans, _iter, _btree_id, \
+ _start, _flags, _k, _do) \
+({ \
+ int _ret = 0; \
+ \
+ bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
+ (_start), (_flags)); \
+ \
+ while (1) { \
+ bch2_trans_begin(_trans); \
+ (_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
+ if (!(_k).k) { \
+ _ret = 0; \
+ break; \
+ } \
+ \
+ _ret = bkey_err(_k) ?: (_do); \
+ if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
+ continue; \
+ if (_ret) \
+ break; \
+ if (!bch2_btree_iter_rewind(&(_iter))) \
+ break; \
+ } \
\
bch2_trans_iter_exit((_trans), &(_iter)); \
_ret; \
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index a5b0a956..fa90581f 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -5,6 +5,7 @@
#include "btree_key_cache.h"
#include "btree_locking.h"
#include "btree_update.h"
+#include "errcode.h"
#include "error.h"
#include "journal.h"
#include "journal_reclaim.h"
@@ -290,9 +291,8 @@ static int btree_key_cache_fill(struct btree_trans *trans,
k = bch2_btree_path_peek_slot(path, &u);
if (!bch2_btree_node_relock(trans, ck_path, 0)) {
- trace_trans_restart_relock_key_cache_fill(trans->fn,
- _THIS_IP_, ck_path->btree_id, &ck_path->pos);
- ret = btree_trans_restart(trans);
+ trace_trans_restart_relock_key_cache_fill(trans, _THIS_IP_, ck_path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
goto err;
}
@@ -347,8 +347,10 @@ static int bkey_cached_check_fn(struct six_lock *lock, void *p)
struct bkey_cached *ck = container_of(lock, struct bkey_cached, c.lock);
const struct btree_path *path = p;
- return ck->key.btree_id == path->btree_id &&
- !bpos_cmp(ck->key.pos, path->pos) ? 0 : -1;
+ if (ck->key.btree_id != path->btree_id &&
+ bpos_cmp(ck->key.pos, path->pos))
+ return BCH_ERR_lock_fail_node_reused;
+ return 0;
}
__flatten
@@ -370,11 +372,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path
retry:
ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos);
if (!ck) {
- if (flags & BTREE_ITER_CACHED_NOCREATE) {
- path->l[0].b = NULL;
- return 0;
- }
-
ck = btree_key_cache_create(c, path->btree_id, path->pos);
ret = PTR_ERR_OR_ZERO(ck);
if (ret)
@@ -387,14 +384,15 @@ retry:
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
- if (!btree_node_lock(trans, path, (void *) ck, path->pos, 0,
- lock_want,
- bkey_cached_check_fn, path, _THIS_IP_)) {
- if (!trans->restarted)
+ ret = btree_node_lock(trans, path, (void *) ck, path->pos, 0,
+ lock_want,
+ bkey_cached_check_fn, path, _THIS_IP_);
+ if (ret) {
+ if (bch2_err_matches(ret, BCH_ERR_lock_fail_node_reused))
goto retry;
-
- ret = -EINTR;
- goto err;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ goto err;
+ BUG();
}
if (ck->key.btree_id != path->btree_id ||
@@ -409,11 +407,15 @@ retry:
path->l[0].lock_seq = ck->c.lock.state.seq;
path->l[0].b = (void *) ck;
fill:
- if (!ck->valid && !(flags & BTREE_ITER_CACHED_NOFILL)) {
+ if (!ck->valid) {
+ /*
+ * Using the underscore version because we haven't set
+ * path->uptodate yet:
+ */
if (!path->locks_want &&
!__bch2_btree_path_upgrade(trans, path, 1)) {
- trace_transaction_restart_ip(trans->fn, _THIS_IP_);
- ret = btree_trans_restart(trans);
+ trace_transaction_restart_key_cache_upgrade(trans, _THIS_IP_);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_upgrade);
goto err;
}
@@ -426,13 +428,14 @@ fill:
set_bit(BKEY_CACHED_ACCESSED, &ck->flags);
path->uptodate = BTREE_ITER_UPTODATE;
+ BUG_ON(!ck->valid);
BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0));
return ret;
err:
- if (ret != -EINTR) {
- btree_node_unlock(path, 0);
- path->l[0].b = BTREE_ITER_NO_NODE_ERROR;
+ if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+ btree_node_unlock(trans, path, 0);
+ path->l[0].b = ERR_PTR(ret);
}
return ret;
}
@@ -455,8 +458,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_ALL_SNAPSHOTS);
bch2_trans_iter_init(trans, &c_iter, key.btree_id, key.pos,
BTREE_ITER_CACHED|
- BTREE_ITER_CACHED_NOFILL|
- BTREE_ITER_CACHED_NOCREATE|
BTREE_ITER_INTENT);
b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
@@ -497,13 +498,14 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
? JOURNAL_WATERMARK_reserved
: 0)|
commit_flags);
- if (ret) {
- bch2_fs_fatal_err_on(ret != -EINTR &&
- ret != -EAGAIN &&
- !bch2_journal_error(j), c,
- "error flushing key cache: %i", ret);
+
+ bch2_fs_fatal_err_on(ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
+ !bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
+ !bch2_journal_error(j), c,
+ "error flushing key cache: %s", bch2_err_str(ret));
+ if (ret)
goto out;
- }
bch2_journal_pin_drop(j, &ck->journal);
bch2_journal_preres_put(j, &ck->res);
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index 67c970d7..c3f3cb87 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -14,6 +14,11 @@
#include "btree_iter.h"
+static inline bool is_btree_node(struct btree_path *path, unsigned l)
+{
+ return l < BTREE_MAX_DEPTH && !IS_ERR_OR_NULL(path->l[l].b);
+}
+
/* matches six lock types */
enum btree_node_locked_type {
BTREE_NODE_UNLOCKED = -1,
@@ -58,7 +63,7 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
path->nodes_intent_locked &= ~(1 << level);
}
-static inline void mark_btree_node_locked(struct btree_trans *trans,
+static inline void mark_btree_node_locked_noreset(struct btree_trans *trans,
struct btree_path *path,
unsigned level,
enum six_lock_type type)
@@ -73,11 +78,22 @@ static inline void mark_btree_node_locked(struct btree_trans *trans,
path->nodes_intent_locked |= type << level;
}
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned level,
+ enum six_lock_type type)
+{
+ mark_btree_node_locked_noreset(trans, path, level, type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[level].lock_taken_time = ktime_get_ns();
+#endif
+}
+
static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
struct btree_path *path,
unsigned level)
{
- mark_btree_node_locked(trans, path, level, SIX_LOCK_intent);
+ mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent);
}
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
@@ -99,23 +115,35 @@ btree_lock_want(struct btree_path *path, int level)
return BTREE_NODE_UNLOCKED;
}
-static inline void btree_node_unlock(struct btree_path *path, unsigned level)
+static inline void btree_node_unlock(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
{
int lock_type = btree_node_locked_type(path, level);
EBUG_ON(level >= BTREE_MAX_DEPTH);
- if (lock_type != BTREE_NODE_UNLOCKED)
+ if (lock_type != BTREE_NODE_UNLOCKED) {
six_unlock_type(&path->l[level].b->c.lock, lock_type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
+ struct bch_fs *c = trans->c;
+
+ __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
+ path->l[level].lock_taken_time,
+ ktime_get_ns());
+ }
+#endif
+ }
mark_btree_node_unlocked(path, level);
}
-static inline void __bch2_btree_path_unlock(struct btree_path *path)
+static inline void __bch2_btree_path_unlock(struct btree_trans *trans,
+ struct btree_path *path)
{
btree_path_set_dirty(path, BTREE_ITER_NEED_RELOCK);
while (path->nodes_locked)
- btree_node_unlock(path, __ffs(path->nodes_locked));
+ btree_node_unlock(trans, path, __ffs(path->nodes_locked));
}
static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
@@ -132,7 +160,7 @@ static inline enum bch_time_stats lock_to_time_stat(enum six_lock_type type)
}
}
-static inline bool btree_node_lock_type(struct btree_trans *trans,
+static inline int btree_node_lock_type(struct btree_trans *trans,
struct btree_path *path,
struct btree *b,
struct bpos pos, unsigned level,
@@ -141,10 +169,10 @@ static inline bool btree_node_lock_type(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
u64 start_time;
- bool ret;
+ int ret;
if (six_trylock_type(&b->c.lock, type))
- return true;
+ return 0;
start_time = local_clock();
@@ -153,14 +181,15 @@ static inline bool btree_node_lock_type(struct btree_trans *trans,
trans->locking_btree_id = path->btree_id;
trans->locking_level = level;
trans->locking_lock_type = type;
- trans->locking = b;
- ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p) == 0;
+ trans->locking = &b->c;
+ ret = six_lock_type(&b->c.lock, type, should_sleep_fn, p);
trans->locking = NULL;
if (ret)
- bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+ return ret;
- return ret;
+ bch2_time_stats_update(&c->times[lock_to_time_stat(type)], start_time);
+ return 0;
}
/*
@@ -183,26 +212,34 @@ static inline bool btree_node_lock_increment(struct btree_trans *trans,
return false;
}
-bool __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
- struct btree *, struct bpos, unsigned,
- enum six_lock_type,
- six_lock_should_sleep_fn, void *,
- unsigned long);
+int __bch2_btree_node_lock(struct btree_trans *, struct btree_path *,
+ struct btree *, struct bpos, unsigned,
+ enum six_lock_type,
+ six_lock_should_sleep_fn, void *,
+ unsigned long);
-static inline bool btree_node_lock(struct btree_trans *trans,
+static inline int btree_node_lock(struct btree_trans *trans,
struct btree_path *path,
struct btree *b, struct bpos pos, unsigned level,
enum six_lock_type type,
six_lock_should_sleep_fn should_sleep_fn, void *p,
unsigned long ip)
{
+ int ret = 0;
+
EBUG_ON(level >= BTREE_MAX_DEPTH);
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
- return likely(six_trylock_type(&b->c.lock, type)) ||
- btree_node_lock_increment(trans, b, level, type) ||
- __bch2_btree_node_lock(trans, path, b, pos, level, type,
- should_sleep_fn, p, ip);
+ if (likely(six_trylock_type(&b->c.lock, type)) ||
+ btree_node_lock_increment(trans, b, level, type) ||
+ !(ret = __bch2_btree_node_lock(trans, path, b, pos, level, type,
+ should_sleep_fn, p, ip))) {
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[b->c.level].lock_taken_time = ktime_get_ns();
+#endif
+ }
+
+ return ret;
}
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
@@ -254,6 +291,30 @@ static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
__bch2_btree_node_lock_write(trans, b);
}
-#endif /* _BCACHEFS_BTREE_LOCKING_H */
+static inline void btree_path_set_should_be_locked(struct btree_path *path)
+{
+ EBUG_ON(!btree_node_locked(path, path->level));
+ EBUG_ON(path->uptodate);
+ path->should_be_locked = true;
+}
+static inline void __btree_path_set_level_up(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned l)
+{
+ btree_node_unlock(trans, path, l);
+ path->l[l].b = ERR_PTR(-BCH_ERR_no_btree_node_up);
+}
+
+static inline void btree_path_set_level_up(struct btree_trans *trans,
+ struct btree_path *path)
+{
+ __btree_path_set_level_up(trans, path, path->level++);
+ btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE);
+}
+
+struct six_lock_count bch2_btree_node_lock_counts(struct btree_trans *,
+ struct btree_path *, struct btree *, unsigned);
+
+#endif /* _BCACHEFS_BTREE_LOCKING_H */
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index be12c9ff..1ff99917 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -199,15 +199,13 @@ struct btree_node_iter {
#define BTREE_ITER_IS_EXTENTS (1 << 4)
#define BTREE_ITER_NOT_EXTENTS (1 << 5)
#define BTREE_ITER_CACHED (1 << 6)
-#define BTREE_ITER_CACHED_NOFILL (1 << 7)
-#define BTREE_ITER_CACHED_NOCREATE (1 << 8)
-#define BTREE_ITER_WITH_KEY_CACHE (1 << 9)
-#define BTREE_ITER_WITH_UPDATES (1 << 10)
-#define BTREE_ITER_WITH_JOURNAL (1 << 11)
-#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
-#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
-#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
-#define BTREE_ITER_NOPRESERVE (1 << 15)
+#define BTREE_ITER_WITH_KEY_CACHE (1 << 7)
+#define BTREE_ITER_WITH_UPDATES (1 << 8)
+#define BTREE_ITER_WITH_JOURNAL (1 << 9)
+#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 10)
+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
+#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 12)
+#define BTREE_ITER_NOPRESERVE (1 << 13)
enum btree_path_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -215,15 +213,6 @@ enum btree_path_uptodate {
BTREE_ITER_NEED_TRAVERSE = 2,
};
-#define BTREE_ITER_NO_NODE_GET_LOCKS ((struct btree *) 1)
-#define BTREE_ITER_NO_NODE_DROP ((struct btree *) 2)
-#define BTREE_ITER_NO_NODE_LOCK_ROOT ((struct btree *) 3)
-#define BTREE_ITER_NO_NODE_UP ((struct btree *) 4)
-#define BTREE_ITER_NO_NODE_DOWN ((struct btree *) 5)
-#define BTREE_ITER_NO_NODE_INIT ((struct btree *) 6)
-#define BTREE_ITER_NO_NODE_ERROR ((struct btree *) 7)
-#define BTREE_ITER_NO_NODE_CACHED ((struct btree *) 8)
-
struct btree_path {
u8 idx;
u8 sorted_idx;
@@ -251,6 +240,9 @@ struct btree_path {
struct btree *b;
struct btree_node_iter iter;
u32 lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ u64 lock_taken_time;
+#endif
} l[BTREE_MAX_DEPTH];
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned long ip_allocated;
@@ -391,7 +383,7 @@ struct btree_trans {
const char *fn;
struct list_head list;
u64 last_begin_time;
- struct btree *locking;
+ struct btree_bkey_cached_common *locking;
unsigned locking_path_idx;
struct bpos locking_pos;
u8 locking_btree_id;
@@ -405,9 +397,12 @@ struct btree_trans {
u8 traverse_all_idx;
bool used_mempool:1;
bool in_traverse_all:1;
- bool restarted:1;
bool memory_allocation_failure:1;
bool is_initial_gc:1;
+ enum bch_errcode restarted:16;
+ u32 restart_count;
+ unsigned long last_restarted_ip;
+
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
@@ -437,6 +432,7 @@ struct btree_trans {
unsigned journal_u64s;
unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
+ int lock_name_idx;
};
#define BTREE_FLAGS() \
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index e9127dbf..89941fb8 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -90,7 +90,6 @@ int bch2_trans_log_msg(struct btree_trans *, const char *);
* This is main entry point for btree updates.
*
* Return values:
- * -EINTR: locking changed, this function should be called again.
* -EROFS: filesystem read only
* -EIO: journal or btree node IO error
*/
@@ -106,29 +105,33 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
return __bch2_trans_commit(trans);
}
-#define lockrestart_do(_trans, _do) \
+#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+ lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_flags)))
+
+#define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
+ nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
+ (_journal_seq), (_flags)))
+
+#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
({ \
+ struct btree_trans trans; \
int _ret; \
\
- do { \
- bch2_trans_begin(_trans); \
- _ret = (_do); \
- } while (_ret == -EINTR); \
+ bch2_trans_init(&trans, (_c), 0, 0); \
+ _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
+ bch2_trans_exit(&trans); \
\
_ret; \
})
-#define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \
- lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
- (_journal_seq), (_flags)))
-
-#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
+#define bch2_trans_run(_c, _do) \
({ \
struct btree_trans trans; \
int _ret; \
\
bch2_trans_init(&trans, (_c), 0, 0); \
- _ret = commit_do(&trans, _disk_res, _journal_seq, _flags, _do); \
+ _ret = (_do); \
bch2_trans_exit(&trans); \
\
_ret; \
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index c3ef2387..e4138614 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -1005,9 +1005,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
nr_nodes[1] += 1;
if (!bch2_btree_path_upgrade(trans, path, U8_MAX)) {
- trace_trans_restart_iter_upgrade(trans->fn, _RET_IP_,
- path->btree_id, &path->pos);
- ret = btree_trans_restart(trans);
+ trace_trans_restart_iter_upgrade(trans, _RET_IP_, path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
return ERR_PTR(ret);
}
@@ -1016,9 +1015,10 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
else if (!down_read_trylock(&c->gc_lock)) {
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
- if (!bch2_trans_relock(trans)) {
+ ret = bch2_trans_relock(trans);
+ if (ret) {
up_read(&c->gc_lock);
- return ERR_PTR(-EINTR);
+ return ERR_PTR(ret);
}
}
@@ -1060,8 +1060,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
journal_flags);
if (ret) {
bch2_btree_update_free(as);
- trace_trans_restart_journal_preres_get(trans->fn, _RET_IP_);
- btree_trans_restart(trans);
+ trace_trans_restart_journal_preres_get(trans, _RET_IP_);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
return ERR_PTR(ret);
}
@@ -1076,10 +1076,9 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- if (!bch2_trans_relock(trans)) {
- ret = -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
goto err;
- }
return as;
err:
@@ -1650,7 +1649,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
if (ret)
goto err;
- sib_path->should_be_locked = true;
+ btree_path_set_should_be_locked(sib_path);
m = sib_path->l[level].b;
@@ -1830,7 +1829,7 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
bch2_btree_update_done(as);
out:
- bch2_btree_path_downgrade(iter->path);
+ bch2_btree_path_downgrade(trans, iter->path);
return ret;
}
@@ -1943,10 +1942,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BUG_ON(iter2.path->level != b->c.level);
BUG_ON(bpos_cmp(iter2.path->pos, new_key->k.p));
- btree_node_unlock(iter2.path, iter2.path->level);
- path_l(iter2.path)->b = BTREE_ITER_NO_NODE_UP;
- iter2.path->level++;
- btree_path_set_dirty(iter2.path, BTREE_ITER_NEED_TRAVERSE);
+ btree_path_set_level_up(trans, iter2.path);
bch2_btree_path_check_sort(trans, iter2.path, 0);
@@ -2017,10 +2013,8 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
int ret = 0;
if (!btree_node_intent_locked(path, b->c.level) &&
- !bch2_btree_path_upgrade(trans, path, b->c.level + 1)) {
- btree_trans_restart(trans);
- return -EINTR;
- }
+ !bch2_btree_path_upgrade(trans, path, b->c.level + 1))
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
closure_init_stack(&cl);
@@ -2033,8 +2027,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite
if (ret) {
bch2_trans_unlock(trans);
closure_sync(&cl);
- if (!bch2_trans_relock(trans))
- return -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ return ret;
}
new_hash = bch2_btree_node_mem_alloc(c, false);
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index aed26b57..dd832f12 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -10,6 +10,7 @@
#include "btree_locking.h"
#include "buckets.h"
#include "debug.h"
+#include "errcode.h"
#include "error.h"
#include "extent_update.h"
#include "journal.h"
@@ -282,9 +283,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
if (ret)
return ret;
- if (!bch2_trans_relock(trans)) {
- trace_trans_restart_journal_preres_get(trans->fn, trace_ip);
- return -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret) {
+ trace_trans_restart_journal_preres_get(trans, trace_ip);
+ return ret;
}
return 0;
@@ -373,15 +375,8 @@ btree_key_can_insert_cached(struct btree_trans *trans,
* Keys returned by peek() are no longer valid pointers, so we need a
* transaction restart:
*/
- trace_trans_restart_key_cache_key_realloced(trans->fn, _RET_IP_,
- path->btree_id, &path->pos,
- old_u64s, new_u64s);
- /*
- * Not using btree_trans_restart() because we can't unlock here, we have
- * write locks held:
- */
- trans->restarted = true;
- return -EINTR;
+ trace_trans_restart_key_cache_key_realloced(trans, _RET_IP_, path, old_u64s, new_u64s);
+ return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_key_cache_realloced);
}
/* Triggers: */
@@ -572,9 +567,8 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
int ret;
if (race_fault()) {
- trace_trans_restart_fault_inject(trans->fn, trace_ip);
- trans->restarted = true;
- return -EINTR;
+ trace_trans_restart_fault_inject(trans, trace_ip);
+ return btree_trans_restart_nounlock(trans, BCH_ERR_transaction_restart_fault_inject);
}
/*
@@ -726,8 +720,10 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
btree_insert_key_leaf(trans, i);
else if (!i->key_cache_already_flushed)
bch2_btree_insert_key_cached(trans, i->path, i->k);
- else
+ else {
bch2_btree_key_cache_drop(trans, i->path);
+ btree_path_set_dirty(i->path, BTREE_ITER_NEED_TRAVERSE);
+ }
}
return ret;
@@ -806,6 +802,7 @@ static inline bool have_conflicting_read_lock(struct btree_trans *trans, struct
static inline int trans_lock_write(struct btree_trans *trans)
{
struct btree_insert_entry *i;
+ int ret;
trans_for_each_update(trans, i) {
if (same_leaf_as_prev(trans, i))
@@ -815,10 +812,11 @@ static inline int trans_lock_write(struct btree_trans *trans)
if (have_conflicting_read_lock(trans, i->path))
goto fail;
- btree_node_lock_type(trans, i->path,
+ ret = btree_node_lock_type(trans, i->path,
insert_l(i)->b,
i->path->pos, i->level,
SIX_LOCK_write, NULL, NULL);
+ BUG_ON(ret);
}
bch2_btree_node_prep_for_write(trans, i->path, insert_l(i)->b);
@@ -833,8 +831,8 @@ fail:
bch2_btree_node_unlock_write_inlined(trans, i->path, insert_l(i)->b);
}
- trace_trans_restart_would_deadlock_write(trans->fn);
- return btree_trans_restart(trans);
+ trace_trans_restart_would_deadlock_write(trans);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock_write);
}
static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans)
@@ -965,12 +963,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
switch (ret) {
case BTREE_INSERT_BTREE_NODE_FULL:
ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
- if (!ret)
- return 0;
-
- if (ret == -EINTR)
- trace_trans_restart_btree_node_split(trans->fn, trace_ip,
- i->btree_id, &i->path->pos);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ trace_trans_restart_btree_node_split(trans, trace_ip, i->path);
break;
case BTREE_INSERT_NEED_MARK_REPLICAS:
bch2_trans_unlock(trans);
@@ -979,19 +973,16 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (ret)
break;
- if (bch2_trans_relock(trans))
- return 0;
-
- trace_trans_restart_mark_replicas(trans->fn, trace_ip);
- ret = -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ trace_trans_restart_mark_replicas(trans, trace_ip);
break;
case BTREE_INSERT_NEED_JOURNAL_RES:
bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
!(trans->flags & JOURNAL_WATERMARK_reserved)) {
- trans->restarted = true;
- ret = -EAGAIN;
+ ret = -BCH_ERR_journal_reclaim_would_deadlock;
break;
}
@@ -999,34 +990,30 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (ret)
break;
- if (bch2_trans_relock(trans))
- return 0;
-
- trace_trans_restart_journal_res_get(trans->fn, trace_ip);
- ret = -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ trace_trans_restart_journal_res_get(trans, trace_ip);
break;
case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
bch2_trans_unlock(trans);
- trace_trans_blocked_journal_reclaim(trans->fn, trace_ip);
+ trace_trans_blocked_journal_reclaim(trans, trace_ip);
wait_event_freezable(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c)));
if (ret < 0)
break;
- if (bch2_trans_relock(trans))
- return 0;
-
- trace_trans_restart_journal_reclaim(trans->fn, trace_ip);
- ret = -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ trace_trans_restart_journal_reclaim(trans, trace_ip);
break;
default:
BUG_ON(ret >= 0);
break;
}
- BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted);
BUG_ON(ret == -ENOSPC &&
!(trans->flags & BTREE_INSERT_NOWAIT) &&
(trans->flags & BTREE_INSERT_NOFAIL));
@@ -1046,13 +1033,11 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
bch2_trans_unlock(trans);
- ret = bch2_fs_read_write_early(c);
+ ret = bch2_fs_read_write_early(c) ?:
+ bch2_trans_relock(trans);
if (ret)
return ret;
- if (!bch2_trans_relock(trans))
- return -EINTR;
-
percpu_ref_get(&c->writes);
return 0;
}
@@ -1122,9 +1107,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
BUG_ON(!i->path->should_be_locked);
if (unlikely(!bch2_btree_path_upgrade(trans, i->path, i->level + 1))) {
- trace_trans_restart_upgrade(trans->fn, _RET_IP_,
- i->btree_id, &i->path->pos);
- ret = btree_trans_restart(trans);
+ trace_trans_restart_upgrade(trans, _RET_IP_, i->path);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_upgrade);
goto out;
}
@@ -1164,7 +1148,7 @@ retry:
if (ret)
goto err;
- trace_transaction_commit(trans->fn, _RET_IP_);
+ trace_transaction_commit(trans, _RET_IP_);
out:
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
@@ -1567,7 +1551,7 @@ bch2_trans_update_by_path_trace(struct btree_trans *trans, struct btree_path *pa
if (ret)
goto err;
- btree_path->should_be_locked = true;
+ btree_path_set_should_be_locked(btree_path);
ret = bch2_trans_update_by_path_trace(trans, btree_path, k, flags, ip);
err:
bch2_path_put(trans, btree_path, true);
@@ -1633,12 +1617,11 @@ int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter
ck = (void *) iter->key_cache_path->l[0].b;
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
- trace_trans_restart_key_cache_raced(trans->fn, _RET_IP_);
- btree_trans_restart(trans);
- return -EINTR;
+ trace_trans_restart_key_cache_raced(trans, _RET_IP_);
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced);
}
- iter->key_cache_path->should_be_locked = true;
+ btree_path_set_should_be_locked(iter->key_cache_path);
}
path = iter->key_cache_path;
@@ -1763,7 +1746,7 @@ retry:
break;
}
- if (ret == -EINTR) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
goto retry;
}
@@ -1782,9 +1765,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
unsigned update_flags,
u64 *journal_seq)
{
- return bch2_trans_do(c, NULL, journal_seq, 0,
- bch2_btree_delete_range_trans(&trans, id, start, end,
- update_flags, journal_seq));
+ return bch2_trans_run(c,
+ bch2_btree_delete_range_trans(&trans, id, start, end, update_flags, journal_seq));
}
int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index fe2cd730..b4be2122 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -544,22 +544,6 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
}
- if (new_a.data_type == BCH_DATA_free &&
- (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
- closure_wake_up(&c->freelist_wait);
-
- if (new_a.data_type == BCH_DATA_need_discard &&
- (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
- bch2_do_discards(c);
-
- if (old_a.data_type != BCH_DATA_cached &&
- new_a.data_type == BCH_DATA_cached &&
- should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
- bch2_do_invalidates(c);
-
- if (new_a.data_type == BCH_DATA_need_gc_gens)
- bch2_do_gc_gens(c);
-
percpu_down_read(&c->mark_lock);
if (!gc && new_a.gen != old_a.gen)
*bucket_gen(ca, new.k->p.offset) = new_a.gen;
@@ -599,6 +583,22 @@ int bch2_mark_alloc(struct btree_trans *trans,
}
}
+ if (new_a.data_type == BCH_DATA_free &&
+ (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+ closure_wake_up(&c->freelist_wait);
+
+ if (new_a.data_type == BCH_DATA_need_discard &&
+ (!new_a.journal_seq || new_a.journal_seq < c->journal.flushed_seq_ondisk))
+ bch2_do_discards(c);
+
+ if (old_a.data_type != BCH_DATA_cached &&
+ new_a.data_type == BCH_DATA_cached &&
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+ bch2_do_invalidates(c);
+
+ if (new_a.data_type == BCH_DATA_need_gc_gens)
+ bch2_do_gc_gens(c);
+
return 0;
}
@@ -1939,8 +1939,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca)
{
- return bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
- __bch2_trans_mark_dev_sb(&trans, ca));
+ return bch2_trans_run(c, __bch2_trans_mark_dev_sb(&trans, ca));
}
/* Disk reservations: */
diff --git a/libbcachefs/checksum.c b/libbcachefs/checksum.c
index 7c2af675..b5850a76 100644
--- a/libbcachefs/checksum.c
+++ b/libbcachefs/checksum.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "checksum.h"
+#include "errcode.h"
#include "super.h"
#include "super-io.h"
@@ -527,7 +528,7 @@ int bch2_decrypt_sb_key(struct bch_fs *c,
ret = bch2_request_key(c->disk_sb.sb, &user_key);
if (ret) {
- bch_err(c, "error requesting encryption key: %i", ret);
+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
goto err;
}
@@ -552,20 +553,24 @@ err:
static int bch2_alloc_ciphers(struct bch_fs *c)
{
+ int ret;
+
if (!c->chacha20)
c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0);
- if (IS_ERR(c->chacha20)) {
- bch_err(c, "error requesting chacha20 module: %li",
- PTR_ERR(c->chacha20));
- return PTR_ERR(c->chacha20);
+ ret = PTR_ERR_OR_ZERO(c->chacha20);
+
+ if (ret) {
+ bch_err(c, "error requesting chacha20 module: %s", bch2_err_str(ret));
+ return ret;
}
if (!c->poly1305)
c->poly1305 = crypto_alloc_shash("poly1305", 0, 0);
- if (IS_ERR(c->poly1305)) {
- bch_err(c, "error requesting poly1305 module: %li",
- PTR_ERR(c->poly1305));
- return PTR_ERR(c->poly1305);
+ ret = PTR_ERR_OR_ZERO(c->poly1305);
+
+ if (ret) {
+ bch_err(c, "error requesting poly1305 module: %s", bch2_err_str(ret));
+ return ret;
}
return 0;
@@ -626,7 +631,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed)
if (keyed) {
ret = bch2_request_key(c->disk_sb.sb, &user_key);
if (ret) {
- bch_err(c, "error requesting encryption key: %i", ret);
+ bch_err(c, "error requesting encryption key: %s", bch2_err_str(ret));
goto err;
}
@@ -678,9 +683,9 @@ int bch2_fs_encryption_init(struct bch_fs *c)
pr_verbose_init(c->opts, "");
c->sha256 = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(c->sha256)) {
- bch_err(c, "error requesting sha256 module");
- ret = PTR_ERR(c->sha256);
+ ret = PTR_ERR_OR_ZERO(c->sha256);
+ if (ret) {
+ bch_err(c, "error requesting sha256 module: %s", bch2_err_str(ret));
goto out;
}
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index c181dba6..3b442b01 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -236,7 +236,7 @@ static int bch2_data_update_index_update(struct bch_write_op *op)
bch2_ob_add_backpointer(c, ec_ob, &insert->k);
}
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
if (ret)
break;
@@ -272,7 +272,7 @@ out:
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&_insert, c);
bch2_bkey_buf_exit(&_new, c);
- BUG_ON(ret == -EINTR);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
return ret;
}
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 05cae0ed..cd37a101 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -189,6 +189,7 @@ struct dump_iter {
struct bch_fs *c;
enum btree_id id;
struct bpos from;
+ struct bpos prev_node;
u64 iter;
struct printbuf buf;
@@ -258,39 +259,30 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- return i->ret;
-
bch2_trans_init(&trans, i->c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, i->id, i->from,
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS);
- k = bch2_btree_iter_peek(&iter);
-
- while (k.k && !(err = bkey_err(k))) {
- bch2_bkey_val_to_text(&i->buf, i->c, k);
- prt_char(&i->buf, '\n');
-
- k = bch2_btree_iter_next(&iter);
- i->from = iter.pos;
-
+ err = for_each_btree_key2(&trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
err = flush_buf(i);
if (err)
break;
if (!i->size)
break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+
+ bch2_bkey_val_to_text(&i->buf, i->c, k);
+ prt_newline(&i->buf);
+ 0;
+ }));
+ i->from = iter.pos;
+
+ if (!err)
+ err = flush_buf(i);
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ return err ?: i->ret;
}
static const struct file_operations btree_debug_ops = {
@@ -360,7 +352,6 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct btree *prev_node = NULL;
int err;
i->ubuf = buf;
@@ -376,44 +367,36 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, i->id, i->from,
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS);
-
- while ((k = bch2_btree_iter_peek(&iter)).k &&
- !(err = bkey_err(k))) {
+ err = for_each_btree_key2(&trans, iter, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
struct bkey_packed *_k =
bch2_btree_node_iter_peek(&l->iter, l->b);
- if (l->b != prev_node) {
- bch2_btree_node_to_text(&i->buf, i->c, l->b);
- err = flush_buf(i);
- if (err)
- break;
- }
- prev_node = l->b;
-
- bch2_bfloat_to_text(&i->buf, l->b, _k);
- err = flush_buf(i);
- if (err)
- break;
-
- bch2_btree_iter_advance(&iter);
- i->from = iter.pos;
-
err = flush_buf(i);
if (err)
break;
if (!i->size)
break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+
+ if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) {
+ bch2_btree_node_to_text(&i->buf, i->c, l->b);
+ i->prev_node = l->b->key.k.p;
+ }
+
+ bch2_bfloat_to_text(&i->buf, l->b, _k);
+ 0;
+ }));
+ i->from = iter.pos;
+
+ if (!err)
+ err = flush_buf(i);
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ return err ?: i->ret;
}
static const struct file_operations bfloat_failed_debug_ops = {
@@ -636,6 +619,75 @@ static const struct file_operations journal_pins_ops = {
.read = bch2_journal_pins_read,
};
+static int lock_held_stats_open(struct inode *inode, struct file *file)
+{
+ struct bch_fs *c = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+ if (!i)
+ return -ENOMEM;
+
+ i->iter = 0;
+ i->c = c;
+ i->buf = PRINTBUF;
+ file->private_data = i;
+
+ return 0;
+}
+
+static int lock_held_stats_release(struct inode *inode, struct file *file)
+{
+ struct dump_iter *i = file->private_data;
+
+ printbuf_exit(&i->buf);
+ kfree(i);
+
+ return 0;
+}
+
+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct lock_held_stats *lhs = &i->c->lock_held_stats;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 8);
+ bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
+ printbuf_indent_sub(&i->buf, 8);
+ prt_newline(&i->buf);
+ i->iter++;
+ }
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations lock_held_stats_op = {
+ .owner = THIS_MODULE,
+ .open = lock_held_stats_open,
+ .release = lock_held_stats_release,
+ .read = lock_held_stats_read,
+};
+
void bch2_fs_debug_exit(struct bch_fs *c)
{
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
@@ -664,6 +716,11 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+ debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
+ c, &lock_held_stats_op);
+ }
+
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 0cbb765c..4d942d22 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -471,7 +471,7 @@ retry:
ret = __bch2_dirent_lookup_trans(&trans, &iter, dir, hash_info,
name, inum, 0);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret)
bch2_trans_iter_exit(&trans, &iter);
@@ -556,7 +556,7 @@ retry:
}
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 6ce352c5..f33acf1a 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -572,18 +572,14 @@ static int ec_stripe_mem_alloc(struct btree_trans *trans,
struct btree_iter *iter)
{
size_t idx = iter->pos.offset;
- int ret = 0;
if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_NOWAIT|__GFP_NOWARN))
- return ret;
+ return 0;
bch2_trans_unlock(trans);
- ret = -EINTR;
- if (!__ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL))
- return ret;
-
- return -ENOMEM;
+ return __ec_stripe_mem_alloc(trans->c, idx, GFP_KERNEL) ?:
+ bch2_trans_relock(trans);
}
static ssize_t stripe_idx_to_delete(struct bch_fs *c)
@@ -726,7 +722,7 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans,
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
int ret;
- for_each_btree_key(trans, iter, BTREE_ID_stripes, start_pos,
+ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
if (start_pos.offset) {
@@ -740,12 +736,13 @@ static int ec_stripe_bkey_insert(struct btree_trans *trans,
}
if (bkey_deleted(k.k))
- goto found_slot;
+ break;
}
- goto err;
-found_slot:
- start_pos = iter.pos;
+ c->ec_stripe_hint = iter.pos.offset;
+
+ if (ret)
+ goto err;
ret = ec_stripe_mem_alloc(trans, &iter);
if (ret)
@@ -754,8 +751,6 @@ found_slot:
stripe->k.p = iter.pos;
ret = bch2_trans_update(trans, &iter, &stripe->k_i, 0);
-
- c->ec_stripe_hint = start_pos.offset;
err:
bch2_trans_iter_exit(trans, &iter);
@@ -822,80 +817,62 @@ static void extent_stripe_ptr_add(struct bkey_s_extent e,
};
}
-static int ec_stripe_update_ptrs(struct bch_fs *c,
- struct ec_stripe_buf *s,
- struct bkey *pos)
+static int ec_stripe_update_extent(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct ec_stripe_buf *s,
+ struct bpos end)
{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_s_extent e;
- struct bkey_buf sk;
- struct bpos next_pos;
- int ret = 0, dev, block;
-
- bch2_bkey_buf_init(&sk);
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-
- /* XXX this doesn't support the reflink btree */
-
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
- bkey_start_pos(pos),
- BTREE_ITER_INTENT);
-retry:
- while (bch2_trans_begin(&trans),
- (k = bch2_btree_iter_peek(&iter)).k &&
- !(ret = bkey_err(k)) &&
- bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
- const struct bch_extent_ptr *ptr_c;
- struct bch_extent_ptr *ptr, *ec_ptr = NULL;
-
- if (extent_has_stripe_ptr(k, s->key.k.p.offset)) {
- bch2_btree_iter_advance(&iter);
- continue;
- }
+ const struct bch_extent_ptr *ptr_c;
+ struct bch_extent_ptr *ptr, *ec_ptr = NULL;
+ struct bkey_i *n;
+ int ret, dev, block;
- ptr_c = bkey_matches_stripe(&s->key.v, k, &block);
- /*
- * It doesn't generally make sense to erasure code cached ptrs:
- * XXX: should we be incrementing a counter?
- */
- if (!ptr_c || ptr_c->cached) {
- bch2_btree_iter_advance(&iter);
- continue;
- }
+ if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
+ return 1;
- dev = s->key.v.ptrs[block].dev;
+ if (extent_has_stripe_ptr(k, s->key.k.p.offset))
+ return 0;
- bch2_bkey_buf_reassemble(&sk, c, k);
- e = bkey_i_to_s_extent(sk.k);
+ ptr_c = bkey_matches_stripe(&s->key.v, k, &block);
+ /*
+ * It doesn't generally make sense to erasure code cached ptrs:
+ * XXX: should we be incrementing a counter?
+ */
+ if (!ptr_c || ptr_c->cached)
+ return 0;
- bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
- ec_ptr = (void *) bch2_bkey_has_device(e.s_c, dev);
- BUG_ON(!ec_ptr);
+ dev = s->key.v.ptrs[block].dev;
- extent_stripe_ptr_add(e, s, ec_ptr, block);
+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
- bch2_btree_iter_set_pos(&iter, bkey_start_pos(&sk.k->k));
- next_pos = sk.k->k.p;
+ bkey_reassemble(n, k);
- ret = bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, sk.k, 0) ?:
- bch2_trans_commit(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL);
- if (!ret)
- bch2_btree_iter_set_pos(&iter, next_pos);
- if (ret)
- break;
- }
- if (ret == -EINTR)
- goto retry;
- bch2_trans_iter_exit(&trans, &iter);
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, ptr->dev != dev);
+ ec_ptr = (void *) bch2_bkey_has_device(bkey_i_to_s_c(n), dev);
+ BUG_ON(!ec_ptr);
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
+ extent_stripe_ptr_add(bkey_i_to_s_extent(n), s, ec_ptr, block);
- return ret;
+ return bch2_trans_update(trans, iter, n, 0);
+}
+
+static int ec_stripe_update_extents(struct bch_fs *c,
+ struct ec_stripe_buf *s,
+ struct bkey *pos)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ return bch2_trans_run(c,
+ for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_extents, bkey_start_pos(pos),
+ BTREE_ITER_NOT_EXTENTS|BTREE_ITER_INTENT, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL,
+ ec_stripe_update_extent(&trans, &iter, k, s, pos->p)));
}
/*
@@ -966,9 +943,10 @@ static void ec_stripe_create(struct ec_stripe_new *s)
}
for_each_keylist_key(&s->keys, k) {
- ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
+ ret = ec_stripe_update_extents(c, &s->new_stripe, &k->k);
if (ret) {
- bch_err(c, "error creating stripe: error %i updating pointers", ret);
+ bch_err(c, "error creating stripe: error updating pointers: %s",
+ bch2_err_str(ret));
break;
}
}
diff --git a/libbcachefs/errcode.c b/libbcachefs/errcode.c
new file mode 100644
index 00000000..9da8a597
--- /dev/null
+++ b/libbcachefs/errcode.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "errcode.h"
+
+#include <linux/errname.h>
+
+static const char * const bch2_errcode_strs[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = #err,
+ BCH_ERRCODES()
+#undef x
+ NULL
+};
+
+#define BCH_ERR_0 0
+
+static unsigned bch2_errcode_parents[] = {
+#define x(class, err) [BCH_ERR_##err - BCH_ERR_START] = BCH_ERR_##class,
+ BCH_ERRCODES()
+#undef x
+};
+
+const char *bch2_err_str(int err)
+{
+ const char *errstr;
+ err = abs(err);
+
+ BUG_ON(err >= BCH_ERR_MAX);
+
+ if (err >= BCH_ERR_START)
+ errstr = bch2_errcode_strs[err - BCH_ERR_START];
+ else if (err)
+ errstr = errname(err);
+ else
+ errstr = "(No error)";
+ return errstr ?: "(Invalid error)";
+}
+
+bool __bch2_err_matches(int err, int class)
+{
+ err = abs(err);
+ class = abs(class);
+
+ BUG_ON(err >= BCH_ERR_MAX);
+ BUG_ON(class >= BCH_ERR_MAX);
+
+ while (err >= BCH_ERR_START && err != class)
+ err = bch2_errcode_parents[err - BCH_ERR_START];
+
+ return err == class;
+}
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 0581f3c7..15a1be2f 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -2,12 +2,73 @@
#ifndef _BCACHEFS_ERRCODE_H
#define _BCACHEFS_ERRCODE_H
-enum {
- /* Bucket allocator: */
- OPEN_BUCKETS_EMPTY = 2048,
- FREELIST_EMPTY, /* Allocator thread not keeping up */
- INSUFFICIENT_DEVICES,
- NEED_SNAPSHOT_CLEANUP,
+#define BCH_ERRCODES() \
+ x(0, open_buckets_empty) \
+ x(0, freelist_empty) \
+ x(freelist_empty, no_buckets_found) \
+ x(0, insufficient_devices) \
+ x(0, transaction_restart) \
+ x(transaction_restart, transaction_restart_fault_inject) \
+ x(transaction_restart, transaction_restart_relock) \
+ x(transaction_restart, transaction_restart_relock_path) \
+ x(transaction_restart, transaction_restart_relock_path_intent) \
+ x(transaction_restart, transaction_restart_relock_after_fill) \
+ x(transaction_restart, transaction_restart_too_many_iters) \
+ x(transaction_restart, transaction_restart_lock_node_reused) \
+ x(transaction_restart, transaction_restart_fill_relock) \
+ x(transaction_restart, transaction_restart_fill_mem_alloc_fail)\
+ x(transaction_restart, transaction_restart_mem_realloced) \
+ x(transaction_restart, transaction_restart_in_traverse_all) \
+ x(transaction_restart, transaction_restart_would_deadlock) \
+ x(transaction_restart, transaction_restart_would_deadlock_write)\
+ x(transaction_restart, transaction_restart_upgrade) \
+ x(transaction_restart, transaction_restart_key_cache_upgrade) \
+ x(transaction_restart, transaction_restart_key_cache_fill) \
+ x(transaction_restart, transaction_restart_key_cache_raced) \
+ x(transaction_restart, transaction_restart_key_cache_realloced)\
+ x(transaction_restart, transaction_restart_journal_preres_get) \
+ x(transaction_restart, transaction_restart_nested) \
+ x(0, no_btree_node) \
+ x(no_btree_node, no_btree_node_relock) \
+ x(no_btree_node, no_btree_node_upgrade) \
+ x(no_btree_node, no_btree_node_drop) \
+ x(no_btree_node, no_btree_node_lock_root) \
+ x(no_btree_node, no_btree_node_up) \
+ x(no_btree_node, no_btree_node_down) \
+ x(no_btree_node, no_btree_node_init) \
+ x(no_btree_node, no_btree_node_cached) \
+ x(0, lock_fail_node_reused) \
+ x(0, lock_fail_root_changed) \
+ x(0, journal_reclaim_would_deadlock) \
+ x(0, fsck) \
+ x(fsck, fsck_fix) \
+ x(fsck, fsck_ignore) \
+ x(fsck, fsck_errors_not_fixed) \
+ x(fsck, fsck_repair_unimplemented) \
+ x(fsck, fsck_repair_impossible) \
+ x(0, need_snapshot_cleanup) \
+ x(0, need_topology_repair)
+
+enum bch_errcode {
+ BCH_ERR_START = 2048,
+#define x(class, err) BCH_ERR_##err,
+ BCH_ERRCODES()
+#undef x
+ BCH_ERR_MAX
};
+const char *bch2_err_str(int);
+bool __bch2_err_matches(int, int);
+
+static inline bool _bch2_err_matches(int err, int class)
+{
+ return err && __bch2_err_matches(err, class);
+}
+
+#define bch2_err_matches(_err, _class) \
+({ \
+ BUILD_BUG_ON(!__builtin_constant_p(_class)); \
+ _bch2_err_matches(_err, _class); \
+})
+
#endif /* _BCACHFES_ERRCODE_H */
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index 8279a9ba..f6a895b2 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -68,8 +68,7 @@ void bch2_io_error(struct bch_dev *ca)
#include "tools-util.h"
#endif
-enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
- const char *fmt, ...)
+int bch2_fsck_err(struct bch_fs *c, unsigned flags, const char *fmt, ...)
{
struct fsck_err_state *s = NULL;
va_list args;
@@ -83,10 +82,10 @@ enum fsck_err_ret bch2_fsck_err(struct bch_fs *c, unsigned flags,
if (c->opts.errors == BCH_ON_ERROR_continue) {
bch_err(c, "fixing");
- return FSCK_ERR_FIX;
+ return -BCH_ERR_fsck_fix;
} else {
bch2_inconsistent_error(c);
- return FSCK_ERR_EXIT;
+ return -BCH_ERR_fsck_errors_not_fixed;
}
}
@@ -156,14 +155,14 @@ print:
if (fix) {
set_bit(BCH_FS_ERRORS_FIXED, &c->flags);
- return FSCK_ERR_FIX;
+ return -BCH_ERR_fsck_fix;
} else {
set_bit(BCH_FS_ERRORS_NOT_FIXED, &c->flags);
set_bit(BCH_FS_ERROR, &c->flags);
return c->opts.fix_errors == FSCK_OPT_EXIT ||
!(flags & FSCK_CAN_IGNORE)
- ? FSCK_ERR_EXIT
- : FSCK_ERR_IGNORE;
+ ? -BCH_ERR_fsck_errors_not_fixed
+ : -BCH_ERR_fsck_ignore;
}
}
diff --git a/libbcachefs/error.h b/libbcachefs/error.h
index 6e63c381..b603d738 100644
--- a/libbcachefs/error.h
+++ b/libbcachefs/error.h
@@ -91,14 +91,6 @@ do { \
* be able to repair:
*/
-enum {
- BCH_FSCK_OK = 0,
- BCH_FSCK_ERRORS_NOT_FIXED = 1,
- BCH_FSCK_REPAIR_UNIMPLEMENTED = 2,
- BCH_FSCK_REPAIR_IMPOSSIBLE = 3,
- BCH_FSCK_UNKNOWN_VERSION = 4,
-};
-
enum fsck_err_opts {
FSCK_OPT_EXIT,
FSCK_OPT_YES,
@@ -106,13 +98,6 @@ enum fsck_err_opts {
FSCK_OPT_ASK,
};
-enum fsck_err_ret {
- FSCK_ERR_IGNORE = 0,
- FSCK_ERR_FIX = 1,
- FSCK_ERR_EXIT = 2,
- FSCK_ERR_START_TOPOLOGY_REPAIR = 3,
-};
-
struct fsck_err_state {
struct list_head list;
const char *fmt;
@@ -127,21 +112,21 @@ struct fsck_err_state {
#define FSCK_NO_RATELIMIT (1 << 3)
__printf(3, 4) __cold
-enum fsck_err_ret bch2_fsck_err(struct bch_fs *,
- unsigned, const char *, ...);
+int bch2_fsck_err(struct bch_fs *, unsigned, const char *, ...);
void bch2_flush_fsck_errs(struct bch_fs *);
#define __fsck_err(c, _flags, msg, ...) \
({ \
- int _fix = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__);\
+ int _ret = bch2_fsck_err(c, _flags, msg, ##__VA_ARGS__); \
\
- if (_fix == FSCK_ERR_EXIT) { \
+ if (_ret != -BCH_ERR_fsck_fix && \
+ _ret != -BCH_ERR_fsck_ignore) { \
bch_err(c, "Unable to continue, halting"); \
- ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ ret = _ret; \
goto fsck_err; \
} \
\
- _fix; \
+ _ret == -BCH_ERR_fsck_fix; \
})
/* These macros return true if error should be fixed: */
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index bcfd9e5f..0a7f172f 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -409,7 +409,7 @@ retry:
offset = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
@@ -850,13 +850,13 @@ void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
bch2_clear_page_bits(&folio->page);
}
-int bch2_releasepage(struct page *page, gfp_t gfp_mask)
+bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
{
- if (PageDirty(page))
- return 0;
+ if (folio_test_dirty(folio) || folio_test_writeback(folio))
+ return false;
- bch2_clear_page_bits(page);
- return 1;
+ bch2_clear_page_bits(&folio->page);
+ return true;
}
#ifdef CONFIG_MIGRATION
@@ -1045,10 +1045,9 @@ retry:
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
- if (!bch2_trans_relock(trans)) {
- ret = -EINTR;
+ ret = bch2_trans_relock(trans);
+ if (ret)
break;
- }
bch2_btree_iter_set_pos(&iter,
POS(inum.inum, rbio->bio.bi_iter.bi_sector));
@@ -1101,7 +1100,7 @@ retry:
err:
bch2_trans_iter_exit(trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (ret) {
@@ -1175,20 +1174,6 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio,
bch2_trans_exit(&trans);
}
-int bch2_readpage(struct file *file, struct page *page)
-{
- struct bch_inode_info *inode = to_bch_ei(page->mapping->host);
- struct bch_fs *c = inode->v.i_sb->s_fs_info;
- struct bch_io_opts opts = io_opts(c, &inode->ei_inode);
- struct bch_read_bio *rbio;
-
- rbio = rbio_init(bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS, &c->bio_read), opts);
- rbio->bio.bi_end_io = bch2_readpages_end_io;
-
- __bchfs_readpage(c, rbio, inode_inum(inode), page);
- return 0;
-}
-
static void bch2_read_single_page_end_io(struct bio *bio)
{
complete(bio->bi_private);
@@ -1221,6 +1206,16 @@ static int bch2_read_single_page(struct page *page,
return 0;
}
+int bch2_read_folio(struct file *file, struct folio *folio)
+{
+ struct page *page = &folio->page;
+ int ret;
+
+ ret = bch2_read_single_page(page, page->mapping);
+ folio_unlock(folio);
+ return ret;
+}
+
/* writepages: */
struct bch_writepage_state {
@@ -1512,7 +1507,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
/* buffered writes: */
int bch2_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned len,
struct page **pagep, void **fsdata)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
@@ -1532,7 +1527,7 @@ int bch2_write_begin(struct file *file, struct address_space *mapping,
bch2_pagecache_add_get(&inode->ei_pagecache_lock);
- page = grab_cache_page_write_begin(mapping, index, flags);
+ page = grab_cache_page_write_begin(mapping, index);
if (!page)
goto err_unlock;
@@ -1663,7 +1658,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
bch2_page_reservation_init(c, inode, &res);
for (i = 0; i < nr_pages; i++) {
- pages[i] = grab_cache_page_write_begin(mapping, index + i, 0);
+ pages[i] = grab_cache_page_write_begin(mapping, index + i);
if (!pages[i]) {
nr_pages = i;
if (!i) {
@@ -2073,7 +2068,7 @@ retry:
offset = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (err == -EINTR)
+ if (bch2_err_matches(err, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
@@ -2449,7 +2444,7 @@ retry:
start = iter.pos;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
@@ -2839,7 +2834,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bch2_trans_copy_iter(&dst, &src);
bch2_trans_copy_iter(&del, &src);
- while (ret == 0 || ret == -EINTR) {
+ while (ret == 0 ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
@@ -3041,7 +3037,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
bch2_disk_reservation_put(c, &disk_res);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
ret = 0;
}
@@ -3321,7 +3317,7 @@ retry:
}
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
@@ -3436,7 +3432,7 @@ retry:
}
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
diff --git a/libbcachefs/fs-io.h b/libbcachefs/fs-io.h
index 7f2d7f45..a22a4e95 100644
--- a/libbcachefs/fs-io.h
+++ b/libbcachefs/fs-io.h
@@ -15,13 +15,13 @@ int __must_check bch2_write_inode_size(struct bch_fs *,
struct bch_inode_info *,
loff_t, unsigned);
-int bch2_readpage(struct file *, struct page *);
+int bch2_read_folio(struct file *, struct folio *);
int bch2_writepages(struct address_space *, struct writeback_control *);
void bch2_readahead(struct readahead_control *);
int bch2_write_begin(struct file *, struct address_space *, loff_t,
- unsigned, unsigned, struct page **, void **);
+ unsigned, struct page **, void **);
int bch2_write_end(struct file *, struct address_space *, loff_t,
unsigned, unsigned, struct page *, void *);
@@ -42,7 +42,7 @@ loff_t bch2_llseek(struct file *, loff_t, int);
vm_fault_t bch2_page_fault(struct vm_fault *);
vm_fault_t bch2_page_mkwrite(struct vm_fault *);
void bch2_invalidate_folio(struct folio *, size_t, size_t);
-int bch2_releasepage(struct page *, gfp_t);
+bool bch2_release_folio(struct folio *, gfp_t);
int bch2_migrate_page(struct address_space *, struct page *,
struct page *, enum migrate_mode);
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 2354c989..3e2b6097 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -8,6 +8,7 @@
#include "buckets.h"
#include "chardev.h"
#include "dirent.h"
+#include "errcode.h"
#include "extents.h"
#include "fs.h"
#include "fs-common.h"
@@ -153,7 +154,7 @@ retry:
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
@@ -323,7 +324,7 @@ retry:
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
err_before_quota:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
goto err_trans;
}
@@ -754,7 +755,7 @@ retry:
btree_err:
bch2_trans_iter_exit(&trans, &inode_iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (unlikely(ret))
goto err_trans;
@@ -985,7 +986,7 @@ retry:
start = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
if (!ret && have_extent)
@@ -1112,14 +1113,14 @@ static const struct inode_operations bch_special_inode_operations = {
};
static const struct address_space_operations bch_address_space_operations = {
- .readpage = bch2_readpage,
+ .read_folio = bch2_read_folio,
.writepages = bch2_writepages,
.readahead = bch2_readahead,
.dirty_folio = filemap_dirty_folio,
.write_begin = bch2_write_begin,
.write_end = bch2_write_end,
.invalidate_folio = bch2_invalidate_folio,
- .releasepage = bch2_releasepage,
+ .release_folio = bch2_release_folio,
.direct_IO = noop_direct_IO,
#ifdef CONFIG_MIGRATION
.migratepage = bch2_migrate_page,
@@ -1335,7 +1336,7 @@ found:
memcpy(name, d.v->d_name, name_len);
name[name_len] = '\0';
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter1);
@@ -1870,10 +1871,9 @@ got_sb:
sb->s_shrink.seeks = 0;
vinode = bch2_vfs_inode_get(c, BCACHEFS_ROOT_SUBVOL_INUM);
- if (IS_ERR(vinode)) {
- bch_err(c, "error mounting: error getting root inode %i",
- (int) PTR_ERR(vinode));
- ret = PTR_ERR(vinode);
+ ret = PTR_ERR_OR_ZERO(vinode);
+ if (ret) {
+ bch_err(c, "error mounting: error getting root inode: %s", bch2_err_str(ret));
goto err_put_super;
}
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 6165878c..c93e177a 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -136,9 +136,9 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
ret = bch2_inode_unpack(k, inode);
err:
- if (ret && ret != -EINTR)
- bch_err(trans->c, "error %i fetching inode %llu",
- ret, inode_nr);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(trans->c, "error fetching inode %llu: %s",
+ inode_nr, bch2_err_str(ret));
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -164,9 +164,9 @@ static int __lookup_inode(struct btree_trans *trans, u64 inode_nr,
if (!ret)
*snapshot = iter.pos.snapshot;
err:
- if (ret && ret != -EINTR)
- bch_err(trans->c, "error %i fetching inode %llu:%u",
- ret, inode_nr, *snapshot);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(trans->c, "error fetching inode %llu:%u: %s",
+ inode_nr, *snapshot, bch2_err_str(ret));
bch2_trans_iter_exit(trans, &iter);
return ret;
}
@@ -225,7 +225,8 @@ static int write_inode(struct btree_trans *trans,
BTREE_INSERT_LAZY_RW,
__write_inode(trans, inode, snapshot));
if (ret)
- bch_err(trans->c, "error in fsck: error %i updating inode", ret);
+ bch_err(trans->c, "error in fsck: error updating inode: %s",
+ bch2_err_str(ret));
return ret;
}
@@ -286,7 +287,7 @@ retry:
BTREE_INSERT_NOFAIL);
err:
bch2_trans_iter_exit(trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
return ret;
@@ -313,8 +314,8 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
bch2_trans_iter_exit(trans, &iter);
err:
- if (ret && ret != -EINTR)
- bch_err(c, "error %i from __remove_dirent()", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error from __remove_dirent(): %s", bch2_err_str(ret));
return ret;
}
@@ -349,8 +350,8 @@ static int lookup_lostfound(struct btree_trans *trans, u32 subvol,
goto create_lostfound;
}
- if (ret && ret != -EINTR)
- bch_err(c, "error looking up lost+found: %i", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error looking up lost+found: %s", bch2_err_str(ret));
if (ret)
return ret;
@@ -372,8 +373,8 @@ create_lostfound:
lostfound, &lostfound_str,
0, 0, S_IFDIR|0700, 0, NULL, NULL,
(subvol_inum) { }, 0);
- if (ret && ret != -EINTR)
- bch_err(c, "error creating lost+found: %i", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error creating lost+found: %s", bch2_err_str(ret));
return ret;
}
@@ -437,8 +438,8 @@ static int reattach_inode(struct btree_trans *trans,
BTREE_INSERT_NOFAIL,
__reattach_inode(trans, inode, inode_snapshot));
if (ret) {
- bch_err(trans->c, "error %i reattaching inode %llu",
- ret, inode->bi_inum);
+ bch_err(trans->c, "error reattaching inode %llu: %s",
+ inode->bi_inum, bch2_err_str(ret));
return ret;
}
@@ -518,7 +519,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
.id = pos.snapshot,
.equiv = bch2_snapshot_equiv(c, pos.snapshot),
};
- int ret;
+ int ret = 0;
if (bkey_cmp(s->pos, pos))
s->ids.nr = 0;
@@ -528,14 +529,13 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
darray_for_each(s->ids, i)
if (i->equiv == n.equiv) {
- if (i->id != n.id) {
- bch_err(c, "snapshot deletion did not run correctly:\n"
+ if (fsck_err_on(i->id != n.id, c,
+ "snapshot deletion did not run correctly:\n"
" duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
bch2_btree_ids[btree_id],
pos.inode, pos.offset,
- i->id, n.id, n.equiv);
- return -NEED_SNAPSHOT_CLEANUP;
- }
+ i->id, n.id, n.equiv))
+ return -BCH_ERR_need_snapshot_cleanup;
return 0;
}
@@ -544,6 +544,7 @@ static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
if (ret)
bch_err(c, "error reallocating snapshots_seen table (size %zu)",
s->ids.size);
+fsck_err:
return ret;
}
@@ -649,6 +650,7 @@ static int __walk_inode(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
+ u32 restart_count = trans->restart_count;
unsigned i;
int ret;
@@ -676,6 +678,10 @@ static int __walk_inode(struct btree_trans *trans,
w->cur_inum = pos.inode;
w->first_this_inode = true;
+
+ if (trans_was_restarted(trans, restart_count))
+ return -BCH_ERR_transaction_restart_nested;
+
lookup_snapshot:
for (i = 0; i < w->inodes.nr; i++)
if (bch2_snapshot_is_ancestor(c, pos.snapshot, w->inodes.data[i].snapshot))
@@ -837,15 +843,14 @@ bad_hash:
"hashed to %llu\n%s",
bch2_btree_ids[desc.btree_id], hash_k.k->p.inode, hash_k.k->p.offset, hash,
(printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf)) == FSCK_ERR_IGNORE)
- return 0;
-
- ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
- if (ret) {
- bch_err(c, "hash_redo_key err %i", ret);
- return ret;
+ bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
+ ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
+ if (ret) {
+ bch_err(c, "hash_redo_key err %s", bch2_err_str(ret));
+ return ret;
+ }
+ ret = -BCH_ERR_transaction_restart_nested;
}
- ret = -EINTR;
fsck_err:
goto out;
}
@@ -910,7 +915,8 @@ static int check_inode(struct btree_trans *trans,
ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
if (ret)
- bch_err(c, "error in fsck: error %i while deleting inode", ret);
+ bch_err(c, "error in fsck: error while deleting inode: %s",
+ bch2_err_str(ret));
return ret;
}
@@ -933,7 +939,8 @@ static int check_inode(struct btree_trans *trans,
POS(u.bi_inum, U64_MAX),
0, NULL);
if (ret) {
- bch_err(c, "error in fsck: error %i truncating inode", ret);
+ bch_err(c, "error in fsck: error truncating inode: %s",
+ bch2_err_str(ret));
return ret;
}
@@ -958,8 +965,8 @@ static int check_inode(struct btree_trans *trans,
sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot);
if (sectors < 0) {
- bch_err(c, "error in fsck: error %i recounting inode sectors",
- (int) sectors);
+ bch_err(c, "error in fsck: error recounting inode sectors: %s",
+ bch2_err_str(sectors));
return sectors;
}
@@ -978,13 +985,13 @@ static int check_inode(struct btree_trans *trans,
if (do_update) {
ret = __write_inode(trans, &u, iter->pos.snapshot);
if (ret)
- bch_err(c, "error in fsck: error %i "
- "updating inode", ret);
+ bch_err(c, "error in fsck: error updating inode: %s",
+ bch2_err_str(ret));
}
err:
fsck_err:
if (ret)
- bch_err(c, "error %i from check_inode()", ret);
+ bch_err(c, "error from check_inode(): %s", bch2_err_str(ret));
return ret;
}
@@ -1003,16 +1010,14 @@ static int check_inodes(struct bch_fs *c, bool full)
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
POS_MIN,
- BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS,
- k,
- NULL, NULL,
- BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_inode(&trans, &iter, k, &prev, &s, full));
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
if (ret)
- bch_err(c, "error %i from check_inodes()", ret);
+ bch_err(c, "error from check_inodes(): %s", bch2_err_str(ret));
return ret;
}
@@ -1115,15 +1120,15 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
- int ret = 0, ret2 = 0;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
s64 count2;
darray_for_each(w->inodes, i) {
if (i->inode.bi_sectors == i->count)
continue;
- count2 = lockrestart_do(trans,
- bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot));
+ count2 = bch2_count_inode_sectors(trans, w->cur_inum, i->snapshot);
if (i->count != count2) {
bch_err(c, "fsck counted i_sectors wrong: got %llu should be %llu",
@@ -1136,19 +1141,21 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SECTORS_DIRTY), c,
"inode %llu:%u has incorrect i_sectors: got %llu, should be %llu",
w->cur_inum, i->snapshot,
- i->inode.bi_sectors, i->count) == FSCK_ERR_IGNORE)
- continue;
-
- i->inode.bi_sectors = i->count;
- ret = write_inode(trans, &i->inode, i->snapshot);
- if (ret)
- break;
- ret2 = -EINTR;
+ i->inode.bi_sectors, i->count)) {
+ i->inode.bi_sectors = i->count;
+ ret = write_inode(trans, &i->inode, i->snapshot);
+ if (ret)
+ break;
+ }
}
fsck_err:
- if (ret)
- bch_err(c, "error %i from check_i_sectors()", ret);
- return ret ?: ret2;
+ if (ret) {
+ bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
+ return ret;
+ }
+ if (trans_was_restarted(trans, restart_count))
+ return -BCH_ERR_transaction_restart_nested;
+ return 0;
}
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
@@ -1184,14 +1191,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
}
- if (!iter->path->should_be_locked) {
- /*
- * hack: check_i_sectors may have handled a transaction restart,
- * it shouldn't be but we need to fix the new i_sectors check
- * code and delete the old bch2_count_inode_sectors() first
- */
- return -EINTR;
- }
+ BUG_ON(!iter->path->should_be_locked);
#if 0
if (bkey_cmp(prev.k->k.p, bkey_start_pos(k.k)) > 0) {
char buf1[200];
@@ -1201,7 +1201,8 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_val_to_text(&PBUF(buf2), c, k);
if (fsck_err(c, "overlapping extents:\n%s\n%s", buf1, buf2)) {
- ret = fix_overlapping_extent(trans, k, prev.k->k.p) ?: -EINTR;
+ ret = fix_overlapping_extent(trans, k, prev.k->k.p)
+ ?: -BCH_ERR_transaction_restart_nested;
goto out;
}
}
@@ -1286,8 +1287,8 @@ err:
fsck_err:
printbuf_exit(&buf);
- if (ret && ret != -EINTR)
- bch_err(c, "error %i from check_extent()", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error from check_extent(): %s", bch2_err_str(ret));
return ret;
}
@@ -1329,7 +1330,7 @@ static int check_extents(struct bch_fs *c)
snapshots_seen_exit(&s);
if (ret)
- bch_err(c, "error %i from check_extents()", ret);
+ bch_err(c, "error from check_extents(): %s", bch2_err_str(ret));
return ret;
}
@@ -1337,7 +1338,8 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
{
struct bch_fs *c = trans->c;
struct inode_walker_entry *i;
- int ret = 0, ret2 = 0;
+ u32 restart_count = trans->restart_count;
+ int ret = 0;
s64 count2;
darray_for_each(w->inodes, i) {
@@ -1363,13 +1365,16 @@ static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
ret = write_inode(trans, &i->inode, i->snapshot);
if (ret)
break;
- ret2 = -EINTR;
}
}
fsck_err:
- if (ret)
- bch_err(c, "error %i from check_subdir_count()", ret);
- return ret ?: ret2;
+ if (ret) {
+ bch_err(c, "error from check_subdir_count(): %s", bch2_err_str(ret));
+ return ret;
+ }
+ if (trans_was_restarted(trans, restart_count))
+ return -BCH_ERR_transaction_restart_nested;
+ return 0;
}
static int check_dirent_target(struct btree_trans *trans,
@@ -1486,8 +1491,8 @@ err:
fsck_err:
printbuf_exit(&buf);
- if (ret && ret != -EINTR)
- bch_err(c, "error %i from check_target()", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error from check_target(): %s", bch2_err_str(ret));
return ret;
}
@@ -1527,10 +1532,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
goto err;
}
- if (!iter->path->should_be_locked) {
- /* hack: see check_extent() */
- return -EINTR;
- }
+ BUG_ON(!iter->path->should_be_locked);
ret = __walk_inode(trans, dir, equiv);
if (ret < 0)
@@ -1659,8 +1661,8 @@ err:
fsck_err:
printbuf_exit(&buf);
- if (ret && ret != -EINTR)
- bch_err(c, "error %i from check_dirent()", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error from check_dirent(): %s", bch2_err_str(ret));
return ret;
}
@@ -1699,7 +1701,7 @@ static int check_dirents(struct bch_fs *c)
inode_walker_exit(&target);
if (ret)
- bch_err(c, "error %i from check_dirents()", ret);
+ bch_err(c, "error from check_dirents(): %s", bch2_err_str(ret));
return ret;
}
@@ -1734,8 +1736,8 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
fsck_err:
- if (ret && ret != -EINTR)
- bch_err(c, "error %i from check_xattr()", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(c, "error from check_xattr(): %s", bch2_err_str(ret));
return ret;
}
@@ -1767,7 +1769,7 @@ static int check_xattrs(struct bch_fs *c)
bch2_trans_exit(&trans);
if (ret)
- bch_err(c, "error %i from check_xattrs()", ret);
+ bch_err(c, "error from check_xattrs(): %s", bch2_err_str(ret));
return ret;
}
@@ -1799,7 +1801,7 @@ static int check_root_trans(struct btree_trans *trans)
BTREE_INSERT_LAZY_RW,
__bch2_btree_insert(trans, BTREE_ID_subvolumes, &root_subvol.k_i));
if (ret) {
- bch_err(c, "error writing root subvol: %i", ret);
+ bch_err(c, "error writing root subvol: %s", bch2_err_str(ret));
goto err;
}
@@ -1818,7 +1820,7 @@ static int check_root_trans(struct btree_trans *trans)
ret = __write_inode(trans, &root_inode, snapshot);
if (ret)
- bch_err(c, "error writing root inode: %i", ret);
+ bch_err(c, "error writing root inode: %s", bch2_err_str(ret));
}
err:
fsck_err:
@@ -1971,7 +1973,7 @@ static int check_path(struct btree_trans *trans,
}
fsck_err:
if (ret)
- bch_err(c, "%s: err %i", __func__, ret);
+ bch_err(c, "%s: err %s", __func__, bch2_err_str(ret));
return ret;
}
@@ -2015,8 +2017,6 @@ static int check_directory_structure(struct bch_fs *c)
}
bch2_trans_iter_exit(&trans, &iter);
- BUG_ON(ret == -EINTR);
-
darray_exit(&path);
bch2_trans_exit(&trans);
@@ -2194,6 +2194,47 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
return ret;
}
+static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct nlink_table *links,
+ size_t *idx, u64 range_end)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_inode_unpacked u;
+ struct nlink *link = &links->d[*idx];
+ int ret = 0;
+
+ if (k.k->p.offset >= range_end)
+ return 1;
+
+ if (!bkey_is_inode(k.k))
+ return 0;
+
+ BUG_ON(bch2_inode_unpack(k, &u));
+
+ if (S_ISDIR(le16_to_cpu(u.bi_mode)))
+ return 0;
+
+ if (!u.bi_nlink)
+ return 0;
+
+ while ((cmp_int(link->inum, k.k->p.offset) ?:
+ cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
+ BUG_ON(*idx == links->nr);
+ link = &links->d[++*idx];
+ }
+
+ if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
+ "inode %llu type %s has wrong i_nlink (%u, should be %u)",
+ u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
+ bch2_inode_nlink_get(&u), link->count)) {
+ bch2_inode_nlink_set(&u, link->count);
+ ret = __write_inode(trans, &u, k.k->p.snapshot);
+ }
+fsck_err:
+ return ret;
+}
+
noinline_for_stack
static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct nlink_table *links,
@@ -2202,56 +2243,25 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct bch_inode_unpacked u;
- struct nlink *link = links->d;
+ size_t idx = 0;
int ret = 0;
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_inodes,
- POS(0, range_start),
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- if (k.k->p.offset >= range_end)
- break;
-
- if (!bkey_is_inode(k.k))
- continue;
-
- BUG_ON(bch2_inode_unpack(k, &u));
-
- if (S_ISDIR(le16_to_cpu(u.bi_mode)))
- continue;
-
- if (!u.bi_nlink)
- continue;
-
- while ((cmp_int(link->inum, k.k->p.offset) ?:
- cmp_int(link->snapshot, k.k->p.snapshot)) < 0) {
- link++;
- BUG_ON(link >= links->d + links->nr);
- }
-
- if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, c,
- "inode %llu type %s has wrong i_nlink (%u, should be %u)",
- u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)],
- bch2_inode_nlink_get(&u), link->count)) {
- bch2_inode_nlink_set(&u, link->count);
+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_inodes,
+ POS(0, range_start),
+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ check_nlinks_update_inode(&trans, &iter, k, links, &idx, range_end));
- ret = write_inode(&trans, &u, k.k->p.snapshot);
- if (ret)
- bch_err(c, "error in fsck: error %i updating inode", ret);
- }
- }
-fsck_err:
- bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
- if (ret)
+ if (ret < 0) {
bch_err(c, "error in fsck: btree error %i while walking inodes", ret);
+ return ret;
+ }
- return ret;
+ return 0;
}
noinline_for_stack
@@ -2291,21 +2301,13 @@ static int check_nlinks(struct bch_fs *c)
return ret;
}
-static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
+static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter,
+ struct bkey_s_c k)
{
- struct bkey_s_c k;
struct bkey_s_c_reflink_p p;
struct bkey_i_reflink_p *u;
int ret;
- k = bch2_btree_iter_peek(iter);
- if (!k.k)
- return 0;
-
- ret = bkey_err(k);
- if (ret)
- return ret;
-
if (k.k->type != KEY_TYPE_reflink_p)
return 0;
@@ -2341,20 +2343,11 @@ static int fix_reflink_p(struct bch_fs *c)
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_extents, POS_MIN,
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
- if (k.k->type == KEY_TYPE_reflink_p) {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW,
- fix_reflink_p_key(&trans, &iter));
- if (ret)
- break;
- }
- }
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_extents, POS_MIN,
+ BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+ fix_reflink_p_key(&trans, &iter, k));
bch2_trans_exit(&trans);
return ret;
@@ -2380,7 +2373,7 @@ again:
check_nlinks(c) ?:
fix_reflink_p(c);
- if (ret == -NEED_SNAPSHOT_CLEANUP) {
+ if (bch2_err_matches(ret, BCH_ERR_need_snapshot_cleanup)) {
set_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
goto again;
}
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 6a2b9490..08310600 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -639,7 +639,7 @@ static int bch2_inode_delete_keys(struct btree_trans *trans,
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL);
err:
- if (ret && ret != -EINTR)
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
break;
}
@@ -710,7 +710,7 @@ retry:
BTREE_INSERT_NOFAIL);
err:
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 50fa5723..93771f83 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -312,7 +312,7 @@ int bch2_extent_update(struct btree_trans *trans,
}
/*
- * Returns -EINTR if we had to drop locks:
+ * Returns -BCH_ERR_transacton_restart if we had to drop locks:
*/
int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
subvol_inum inum, u64 end,
@@ -325,7 +325,8 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
int ret = 0, ret2 = 0;
u32 snapshot;
- while (!ret || ret == -EINTR) {
+ while (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
@@ -384,7 +385,10 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
- return ret == -EINTR ? 0 : ret;
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ ret = 0;
+
+ return ret;
}
int bch2_write_index_default(struct bch_write_op *op)
@@ -415,7 +419,7 @@ int bch2_write_index_default(struct bch_write_op *op)
ret = bch2_subvolume_get_snapshot(&trans, inum.subvol,
&sk.k->k.p.snapshot);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
@@ -430,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op)
op->flags & BCH_WRITE_CHECK_ENOSPC);
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
@@ -580,14 +584,14 @@ static void __bch2_write_index(struct bch_write_op *op)
u64 sectors_start = keylist_sectors(keys);
int ret = op->index_update_fn(op);
- BUG_ON(ret == -EINTR);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
BUG_ON(keylist_sectors(keys) && !ret);
op->written += sectors_start - keylist_sectors(keys);
if (ret) {
bch_err_inum_ratelimited(c, op->pos.inode,
- "write error %i from btree update", ret);
+ "write error while doing btree update: %s", bch2_err_str(ret));
op->error = ret;
}
}
@@ -1915,6 +1919,7 @@ static void bch2_read_endio(struct bio *bio)
}
if (rbio->narrow_crcs ||
+ rbio->promote ||
crc_is_compressed(rbio->pick.crc) ||
bch2_csum_type_is_encryption(rbio->pick.crc.csum_type))
context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq;
@@ -2316,10 +2321,9 @@ retry:
* read_extent -> io_time_reset may cause a transaction restart
* without returning an error, we need to check for that here:
*/
- if (!bch2_trans_relock(&trans)) {
- ret = -EINTR;
+ ret = bch2_trans_relock(&trans);
+ if (ret)
break;
- }
bch2_btree_iter_set_pos(&iter,
POS(inum.inum, bvec_iter.bi_sector));
@@ -2373,7 +2377,9 @@ retry:
err:
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ ret == READ_RETRY ||
+ ret == READ_RETRY_AVOID)
goto retry;
bch2_trans_exit(&trans);
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index b561ed78..d77092aa 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -883,7 +883,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
if (!new_fs) {
for (i = 0; i < nr_got; i++) {
- ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
+ ret = bch2_trans_run(c,
bch2_trans_mark_metadata_bucket(&trans, ca,
bu[i], BCH_DATA_journal,
ca->mi.bucket_size));
@@ -1146,7 +1146,7 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
bch2_sb_get_journal(sb);
struct bch_sb_field_journal_v2 *journal_buckets_v2 =
bch2_sb_get_journal_v2(sb);
- unsigned i;
+ unsigned i, nr_bvecs;
ja->nr = 0;
@@ -1163,11 +1163,14 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
if (!ja->bucket_seq)
return -ENOMEM;
- ca->journal.bio = bio_kmalloc(GFP_KERNEL,
- DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE));
+ nr_bvecs = DIV_ROUND_UP(JOURNAL_ENTRY_SIZE_MAX, PAGE_SIZE);
+
+ ca->journal.bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
if (!ca->journal.bio)
return -ENOMEM;
+ bio_init(ca->journal.bio, NULL, ca->journal.bio->bi_inline_vecs, nr_bvecs, 0);
+
ja->buckets = kcalloc(ja->nr, sizeof(u64), GFP_KERNEL);
if (!ja->buckets)
return -ENOMEM;
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 0ff78a27..107521e1 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -197,7 +197,7 @@ static void journal_entry_null_range(void *start, void *end)
bch_err(c, "corrupt metadata before write:\n" \
msg, ##__VA_ARGS__); \
if (bch2_fs_inconsistent(c)) { \
- ret = BCH_FSCK_ERRORS_NOT_FIXED; \
+ ret = -BCH_ERR_fsck_errors_not_fixed; \
goto fsck_err; \
} \
break; \
@@ -823,20 +823,20 @@ static int journal_read_bucket(struct bch_dev *ca,
while (offset < end) {
if (!sectors_read) {
struct bio *bio;
+ unsigned nr_bvecs;
reread:
sectors_read = min_t(unsigned,
end - offset, buf->size >> 9);
+ nr_bvecs = buf_pages(buf->data, sectors_read << 9);
- bio = bio_kmalloc(GFP_KERNEL,
- buf_pages(buf->data,
- sectors_read << 9));
- bio_set_dev(bio, ca->disk_sb.bdev);
- bio->bi_iter.bi_sector = offset;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+ bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
+
+ bio->bi_iter.bi_sector = offset;
bch2_bio_map(bio, buf->data, sectors_read << 9);
ret = submit_bio_wait(bio);
- bio_put(bio);
+ kfree(bio);
if (bch2_dev_io_err_on(ret, ca,
"journal read error: sector %llu",
@@ -858,7 +858,7 @@ reread:
end - offset, sectors_read,
READ);
switch (ret) {
- case BCH_FSCK_OK:
+ case 0:
sectors = vstruct_sectors(j, c->block_bits);
break;
case JOURNAL_ENTRY_REREAD:
diff --git a/libbcachefs/journal_reclaim.c b/libbcachefs/journal_reclaim.c
index fdc94e83..9f8b63b3 100644
--- a/libbcachefs/journal_reclaim.c
+++ b/libbcachefs/journal_reclaim.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
+#include "errcode.h"
#include "error.h"
#include "journal.h"
#include "journal_io.h"
@@ -282,11 +283,11 @@ void bch2_journal_do_discards(struct journal *j)
while (should_discard_bucket(j, ja)) {
if (!c->opts.nochanges &&
ca->mi.discard &&
- blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
+ bdev_max_discard_sectors(ca->disk_sb.bdev))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
ja->buckets[ja->discard_idx]),
- ca->mi.bucket_size, GFP_NOIO, 0);
+ ca->mi.bucket_size, GFP_NOIO);
spin_lock(&j->lock);
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
@@ -740,15 +741,17 @@ int bch2_journal_reclaim_start(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct task_struct *p;
+ int ret;
if (j->reclaim_thread)
return 0;
p = kthread_create(bch2_journal_reclaim_thread, j,
"bch-reclaim/%s", c->name);
- if (IS_ERR(p)) {
- bch_err(c, "error creating journal reclaim thread: %li", PTR_ERR(p));
- return PTR_ERR(p);
+ ret = PTR_ERR_OR_ZERO(p);
+ if (ret) {
+ bch_err(c, "error creating journal reclaim thread: %s", bch2_err_str(ret));
+ return ret;
}
get_task_struct(p);
diff --git a/libbcachefs/journal_seq_blacklist.c b/libbcachefs/journal_seq_blacklist.c
index d9b4042a..5c555b37 100644
--- a/libbcachefs/journal_seq_blacklist.c
+++ b/libbcachefs/journal_seq_blacklist.c
@@ -272,7 +272,7 @@ retry:
!test_bit(BCH_FS_STOPPING, &c->flags))
b = bch2_btree_iter_next_node(&iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index 94ecb3a3..53e607d7 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -130,25 +130,18 @@ int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
}
static int bch2_check_lru_key(struct btree_trans *trans,
- struct btree_iter *lru_iter)
+ struct btree_iter *lru_iter,
+ struct bkey_s_c lru_k)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct bkey_s_c lru_k, k;
+ struct bkey_s_c k;
struct bch_alloc_v4 a;
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
struct bpos alloc_pos;
int ret;
- lru_k = bch2_btree_iter_peek(lru_iter);
- if (!lru_k.k)
- return 0;
-
- ret = bkey_err(lru_k);
- if (ret)
- return ret;
-
alloc_pos = POS(lru_k.k->p.inode,
le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx));
@@ -202,16 +195,10 @@ int bch2_check_lrus(struct bch_fs *c)
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN,
- BTREE_ITER_PREFETCH, k, ret) {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW,
- bch2_check_lru_key(&trans, &iter));
- if (ret)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL|BTREE_INSERT_LAZY_RW,
+ bch2_check_lru_key(&trans, &iter, k));
bch2_trans_exit(&trans);
return ret;
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index 5345697f..8b258d96 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -8,6 +8,7 @@
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
+#include "errcode.h"
#include "extents.h"
#include "io.h"
#include "journal.h"
@@ -35,85 +36,76 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
return 0;
}
-static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
- enum btree_id btree_id)
+static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ unsigned dev_idx,
+ int flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ if (!bch2_bkey_has_device(k, dev_idx))
+ return 0;
+
+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bkey_reassemble(n, k);
+
+ ret = drop_dev_ptrs(c, bkey_i_to_s(n), dev_idx, flags, false);
+ if (ret)
+ return ret;
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k))
+ n->k.size = 0;
+
+ return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+}
+
+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- struct bkey_buf sk;
+ enum btree_id id;
int ret = 0;
- bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- bch2_trans_iter_init(&trans, &iter, btree_id, POS_MIN,
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS);
-
- while ((bch2_trans_begin(&trans),
- (k = bch2_btree_iter_peek(&iter)).k) &&
- !(ret = bkey_err(k))) {
- if (!bch2_bkey_has_device(k, dev_idx)) {
- bch2_btree_iter_advance(&iter);
+ for (id = 0; id < BTREE_ID_NR; id++) {
+ if (!btree_type_has_ptrs(id))
continue;
- }
- bch2_bkey_buf_reassemble(&sk, c, k);
-
- ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
- dev_idx, flags, false);
- if (ret)
- break;
-
- /*
- * If the new extent no longer has any pointers, bch2_extent_normalize()
- * will do the appropriate thing with it (turning it into a
- * KEY_TYPE_error key, or just a discard if it was a cached extent)
- */
- bch2_extent_normalize(c, bkey_i_to_s(sk.k));
-
- /*
- * Since we're not inserting through an extent iterator
- * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
- * we aren't using the extent overwrite path to delete, we're
- * just using the normal key deletion path:
- */
- if (bkey_deleted(&sk.k->k))
- sk.k->k.size = 0;
-
- ret = bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, sk.k,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
- bch2_trans_commit(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL);
-
- /*
- * don't want to leave ret == -EINTR, since if we raced and
- * something else overwrote the key we could spuriously return
- * -EINTR below:
- */
- if (ret == -EINTR)
- ret = 0;
+ ret = for_each_btree_key_commit(&trans, iter, id, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL,
+ bch2_dev_usrdata_drop_key(&trans, &iter, k, dev_idx, flags));
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
-
- BUG_ON(ret == -EINTR);
return ret;
}
-static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
-{
- return __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_extents) ?:
- __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_reflink);
-}
-
static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
struct btree_trans trans;
@@ -154,19 +146,20 @@ retry:
}
ret = bch2_btree_node_update_key(&trans, &iter, b, k.k, false);
- if (ret == -EINTR) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
if (ret) {
- bch_err(c, "Error updating btree node key: %i", ret);
+ bch_err(c, "Error updating btree node key: %s",
+ bch2_err_str(ret));
break;
}
next:
bch2_btree_iter_next_node(&iter);
}
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
@@ -181,7 +174,7 @@ err:
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&k, c);
- BUG_ON(ret == -EINTR);
+ BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart));
return ret;
}
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 9748b865..2fc24745 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -9,6 +9,7 @@
#include "btree_update_interior.h"
#include "disk_groups.h"
#include "ec.h"
+#include "errcode.h"
#include "error.h"
#include "inode.h"
#include "io.h"
@@ -370,7 +371,7 @@ static int move_get_io_opts(struct btree_trans *trans,
ret = lookup_inode(trans,
SPOS(0, k.k->p.inode, k.k->p.snapshot),
&inode);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
return ret;
if (!ret)
@@ -418,7 +419,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
break;
ret = bkey_err(k);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
@@ -449,7 +450,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
ret2 = bch2_move_extent(&trans, ctxt, io_opts,
btree_id, k, data_opts);
if (ret2) {
- if (ret2 == -EINTR)
+ if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
continue;
if (ret2 == -ENOMEM) {
@@ -574,7 +575,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
ret = bch2_get_next_backpointer(&trans, bucket, gen,
&bp_offset, &bp);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
@@ -589,7 +590,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
k = bch2_backpointer_get_key(&trans, &iter,
bucket, bp_offset, bp);
ret = bkey_err(k);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
@@ -616,7 +617,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
ret = bch2_move_extent(&trans, ctxt, io_opts,
bp.btree_id, k, data_opts);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
@@ -635,7 +636,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
b = bch2_backpointer_get_node(&trans, &iter,
bucket, bp_offset, bp);
ret = PTR_ERR_OR_ZERO(b);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
@@ -645,7 +646,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0);
bch2_trans_iter_exit(&trans, &iter);
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
goto err;
@@ -740,14 +741,14 @@ retry:
goto next;
ret = bch2_btree_node_rewrite(&trans, &iter, b, 0) ?: ret;
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
next:
bch2_btree_iter_next_node(&iter);
}
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_iter_exit(&trans, &iter);
@@ -759,7 +760,7 @@ next:
bch2_trans_exit(&trans);
if (ret)
- bch_err(c, "error %i in bch2_move_btree", ret);
+ bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
bch2_btree_interior_updates_flush(c);
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index f9ad4cb2..f913864e 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -13,6 +13,7 @@
#include "buckets.h"
#include "clock.h"
#include "disk_groups.h"
+#include "errcode.h"
#include "error.h"
#include "extents.h"
#include "eytzinger.h"
@@ -162,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c)
bch2_moving_ctxt_exit(&ctxt);
if (ret < 0)
- bch_err(c, "error %i from bch2_move_data() in copygc", ret);
+ bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
trace_copygc(c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
return ret;
@@ -251,6 +252,7 @@ void bch2_copygc_stop(struct bch_fs *c)
int bch2_copygc_start(struct bch_fs *c)
{
struct task_struct *t;
+ int ret;
if (c->copygc_thread)
return 0;
@@ -262,9 +264,10 @@ int bch2_copygc_start(struct bch_fs *c)
return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
- if (IS_ERR(t)) {
- bch_err(c, "error creating copygc thread: %li", PTR_ERR(t));
- return PTR_ERR(t);
+ ret = PTR_ERR_OR_ZERO(t);
+ if (ret) {
+ bch_err(c, "error creating copygc thread: %s", bch2_err_str(ret));
+ return ret;
}
get_task_struct(t);
diff --git a/libbcachefs/movinggc.h b/libbcachefs/movinggc.h
index 92273824..e85c8136 100644
--- a/libbcachefs/movinggc.h
+++ b/libbcachefs/movinggc.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MOVINGGC_H
#define _BCACHEFS_MOVINGGC_H
+unsigned long bch2_copygc_wait_amount(struct bch_fs *);
void bch2_copygc_stop(struct bch_fs *);
int bch2_copygc_start(struct bch_fs *);
void bch2_fs_copygc_init(struct bch_fs *);
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 2f5f49cb..5b8586ec 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -341,6 +341,11 @@ enum opt_type {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Don't open device in exclusive mode") \
+ x(direct_io, u8, \
+ OPT_FS|OPT_MOUNT, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "Use O_DIRECT (userspace only)") \
x(sb, u64, \
OPT_MOUNT, \
OPT_UINT(0, S64_MAX), \
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
index e35a6d1f..454c76e0 100644
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "btree_update.h"
+#include "errcode.h"
#include "inode.h"
#include "quota.h"
#include "subvolume.h"
@@ -370,6 +371,9 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
BUG_ON(k.k->p.inode >= QTYP_NR);
+ if (!((1U << k.k->p.inode) & enabled_qtypes(c)))
+ return 0;
+
switch (k.k->type) {
case KEY_TYPE_quota:
dq = bkey_s_c_to_quota(k);
@@ -393,30 +397,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k)
return 0;
}
-static int bch2_quota_init_type(struct bch_fs *c, enum quota_types type)
-{
- struct btree_trans trans;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret = 0;
-
- bch2_trans_init(&trans, c, 0, 0);
-
- for_each_btree_key(&trans, iter, BTREE_ID_quotas, POS(type, 0),
- BTREE_ITER_PREFETCH, k, ret) {
- if (k.k->p.inode != type)
- break;
-
- ret = __bch2_quota_set(c, k);
- if (ret)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
-
- bch2_trans_exit(&trans);
- return ret;
-}
-
void bch2_fs_quota_exit(struct bch_fs *c)
{
unsigned i;
@@ -491,8 +471,6 @@ advance:
int bch2_fs_quota_read(struct bch_fs *c)
{
- unsigned i, qtypes = enabled_qtypes(c);
- struct bch_memquota_type *q;
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
@@ -502,23 +480,16 @@ int bch2_fs_quota_read(struct bch_fs *c)
bch2_sb_quota_read(c);
mutex_unlock(&c->sb_lock);
- for_each_set_qtype(c, i, q, qtypes) {
- ret = bch2_quota_init_type(c, i);
- if (ret)
- return ret;
- }
-
bch2_trans_init(&trans, c, 0, 0);
- ret = for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
- POS_MIN,
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_ALL_SNAPSHOTS,
- k,
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_quotas,
+ POS_MIN, BTREE_ITER_PREFETCH, k,
+ __bch2_quota_set(c, k)) ?:
+ for_each_btree_key2(&trans, iter, BTREE_ID_inodes,
+ POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
bch2_fs_quota_read_inode(&trans, &iter, k));
if (ret)
- bch_err(c, "err reading inodes in quota init: %i", ret);
+ bch_err(c, "err in quota_read: %s", bch2_err_str(ret));
bch2_trans_exit(&trans);
return ret;
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index 31da4093..ecc64dd9 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -6,6 +6,7 @@
#include "buckets.h"
#include "clock.h"
#include "disk_groups.h"
+#include "errcode.h"
#include "extents.h"
#include "io.h"
#include "move.h"
@@ -331,6 +332,7 @@ void bch2_rebalance_stop(struct bch_fs *c)
int bch2_rebalance_start(struct bch_fs *c)
{
struct task_struct *p;
+ int ret;
if (c->rebalance.thread)
return 0;
@@ -339,9 +341,10 @@ int bch2_rebalance_start(struct bch_fs *c)
return 0;
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
- if (IS_ERR(p)) {
- bch_err(c, "error creating rebalance thread: %li", PTR_ERR(p));
- return PTR_ERR(p);
+ ret = PTR_ERR_OR_ZERO(p);
+ if (ret) {
+ bch_err(c, "error creating rebalance thread: %s", bch2_err_str(ret));
+ return ret;
}
get_task_struct(p);
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 64b1e79f..b070bdf0 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -11,6 +11,7 @@
#include "buckets.h"
#include "dirent.h"
#include "ec.h"
+#include "errcode.h"
#include "error.h"
#include "fs-common.h"
#include "fsck.h"
@@ -87,9 +88,9 @@ static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t i
return keys->d + idx_to_pos(keys, idx);
}
-static size_t bch2_journal_key_search(struct journal_keys *keys,
- enum btree_id id, unsigned level,
- struct bpos pos)
+static size_t __bch2_journal_key_search(struct journal_keys *keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
{
size_t l = 0, r = keys->nr, m;
@@ -107,7 +108,14 @@ static size_t bch2_journal_key_search(struct journal_keys *keys,
BUG_ON(l &&
__journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0);
- return idx_to_pos(keys, l);
+ return l;
+}
+
+static size_t bch2_journal_key_search(struct journal_keys *keys,
+ enum btree_id id, unsigned level,
+ struct bpos pos)
+{
+ return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos));
}
struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree_id,
@@ -116,22 +124,21 @@ struct bkey_i *bch2_journal_keys_peek_upto(struct bch_fs *c, enum btree_id btree
{
struct journal_keys *keys = &c->journal_keys;
unsigned iters = 0;
+ struct journal_key *k;
search:
if (!*idx)
- *idx = bch2_journal_key_search(keys, btree_id, level, pos);
+ *idx = __bch2_journal_key_search(keys, btree_id, level, pos);
- while (*idx < keys->size &&
- keys->d[*idx].btree_id == btree_id &&
- keys->d[*idx].level == level &&
- bpos_cmp(keys->d[*idx].k->k.p, end_pos) <= 0) {
- if (bpos_cmp(keys->d[*idx].k->k.p, pos) >= 0 &&
- !keys->d[*idx].overwritten)
- return keys->d[*idx].k;
+ while (*idx < keys->nr &&
+ (k = idx_to_key(keys, *idx),
+ k->btree_id == btree_id &&
+ k->level == level &&
+ bpos_cmp(k->k->k.p, end_pos) <= 0)) {
+ if (bpos_cmp(k->k->k.p, pos) >= 0 &&
+ !k->overwritten)
+ return k->k;
(*idx)++;
- if (*idx == keys->gap)
- *idx += keys->size - keys->nr;
-
iters++;
if (iters == 10) {
*idx = 0;
@@ -1153,7 +1160,7 @@ int bch2_fs_recovery(struct bch_fs *c)
use_clean:
if (!clean) {
bch_err(c, "no superblock clean section found");
- ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
+ ret = -BCH_ERR_fsck_repair_impossible;
goto err;
}
@@ -1435,9 +1442,9 @@ out:
}
if (ret)
- bch_err(c, "Error in recovery: %s (%i)", err, ret);
+ bch_err(c, "Error in recovery: %s (%s)", err, bch2_err_str(ret));
else
- bch_verbose(c, "ret %i", ret);
+ bch_verbose(c, "ret %s", bch2_err_str(ret));
return ret;
err:
fsck_err:
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 2038e350..d5c14bb2 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -299,7 +299,8 @@ s64 bch2_remap_range(struct bch_fs *c,
bch2_trans_iter_init(&trans, &dst_iter, BTREE_ID_extents, dst_start,
BTREE_ITER_INTENT);
- while ((ret == 0 || ret == -EINTR) &&
+ while ((ret == 0 ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart)) &&
bkey_cmp(dst_iter.pos, dst_end) < 0) {
struct disk_reservation disk_res = { 0 };
@@ -409,7 +410,7 @@ s64 bch2_remap_range(struct bch_fs *c,
}
bch2_trans_iter_exit(&trans, &inode_iter);
- } while (ret2 == -EINTR);
+ } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart));
bch2_trans_exit(&trans);
bch2_bkey_buf_exit(&new_src, c);
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 1a212bac..24244bc3 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -3,6 +3,7 @@
#include "bcachefs.h"
#include "btree_key_cache.h"
#include "btree_update.h"
+#include "errcode.h"
#include "error.h"
#include "fs.h"
#include "subvolume.h"
@@ -291,22 +292,14 @@ int bch2_fs_check_snapshots(struct bch_fs *c)
}
static int check_subvol(struct btree_trans *trans,
- struct btree_iter *iter)
+ struct btree_iter *iter,
+ struct bkey_s_c k)
{
- struct bkey_s_c k;
struct bkey_s_c_subvolume subvol;
struct bch_snapshot snapshot;
unsigned snapid;
int ret;
- k = bch2_btree_iter_peek(iter);
- if (!k.k)
- return 0;
-
- ret = bkey_err(k);
- if (ret)
- return ret;
-
if (k.k->type != KEY_TYPE_subvolume)
return 0;
@@ -322,9 +315,9 @@ static int check_subvol(struct btree_trans *trans,
if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
ret = bch2_subvolume_delete(trans, iter->pos.offset);
- if (ret && ret != -EINTR)
- bch_err(trans->c, "error deleting subvolume %llu: %i",
- iter->pos.offset, ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(trans->c, "error deleting subvolume %llu: %s",
+ iter->pos.offset, bch2_err_str(ret));
if (ret)
return ret;
}
@@ -336,22 +329,15 @@ int bch2_fs_check_subvols(struct bch_fs *c)
{
struct btree_trans trans;
struct btree_iter iter;
+ struct bkey_s_c k;
int ret;
bch2_trans_init(&trans, c, 0, 0);
- bch2_trans_iter_init(&trans, &iter, BTREE_ID_subvolumes,
- POS_MIN, BTREE_ITER_PREFETCH);
-
- do {
- ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL,
- check_subvol(&trans, &iter));
- if (ret)
- break;
- } while (bch2_btree_iter_advance(&iter));
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
+ NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
+ check_subvol(&trans, &iter, k));
bch2_trans_exit(&trans);
@@ -380,7 +366,7 @@ int bch2_fs_snapshots_start(struct bch_fs *c)
bch2_trans_exit(&trans);
if (ret)
- bch_err(c, "error starting snapshots: %i", ret);
+ bch_err(c, "error starting snapshots: %s", bch2_err_str(ret));
return ret;
}
@@ -595,59 +581,27 @@ err:
return ret;
}
-static int bch2_snapshot_delete_keys_btree(struct btree_trans *trans,
- snapshot_id_list *deleted,
- enum btree_id btree_id)
+static int snapshot_delete_key(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ snapshot_id_list *deleted,
+ snapshot_id_list *equiv_seen,
+ struct bpos *last_pos)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- snapshot_id_list equiv_seen = { 0 };
- struct bpos last_pos = POS_MIN;
- int ret = 0;
-
- /*
- * XXX: We should also delete whiteouts that no longer overwrite
- * anything
- */
+ u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
- bch2_trans_iter_init(trans, &iter, btree_id, POS_MIN,
- BTREE_ITER_INTENT|
- BTREE_ITER_PREFETCH|
- BTREE_ITER_NOT_EXTENTS|
- BTREE_ITER_ALL_SNAPSHOTS);
-
- while ((bch2_trans_begin(trans),
- (k = bch2_btree_iter_peek(&iter)).k) &&
- !(ret = bkey_err(k))) {
- u32 equiv = snapshot_t(c, k.k->p.snapshot)->equiv;
-
- if (bkey_cmp(k.k->p, last_pos))
- equiv_seen.nr = 0;
- last_pos = k.k->p;
-
- if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
- snapshot_list_has_id(&equiv_seen, equiv)) {
- ret = commit_do(trans, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- bch2_btree_iter_traverse(&iter) ?:
- bch2_btree_delete_at(trans, &iter,
- BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
- if (ret)
- break;
- } else {
- ret = snapshot_list_add(c, &equiv_seen, equiv);
- if (ret)
- break;
- }
+ if (bkey_cmp(k.k->p, *last_pos))
+ equiv_seen->nr = 0;
+ *last_pos = k.k->p;
- bch2_btree_iter_advance(&iter);
+ if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
+ snapshot_list_has_id(equiv_seen, equiv)) {
+ return bch2_btree_delete_at(trans, iter,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+ } else {
+ return snapshot_list_add(c, equiv_seen, equiv);
}
- bch2_trans_iter_exit(trans, &iter);
-
- darray_exit(&equiv_seen);
-
- return ret;
}
static int bch2_delete_redundant_snapshot(struct btree_trans *trans, struct btree_iter *iter,
@@ -694,7 +648,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_bit(BCH_FS_STARTED, &c->flags)) {
ret = bch2_fs_read_write_early(c);
if (ret) {
- bch_err(c, "error deleleting dead snapshots: error going rw: %i", ret);
+ bch_err(c, "error deleleting dead snapshots: error going rw: %s", bch2_err_str(ret));
return ret;
}
}
@@ -710,7 +664,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
NULL, NULL, 0,
bch2_delete_redundant_snapshot(&trans, &iter, k));
if (ret) {
- bch_err(c, "error deleting redundant snapshots: %i", ret);
+ bch_err(c, "error deleting redundant snapshots: %s", bch2_err_str(ret));
goto err;
}
@@ -718,7 +672,7 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
POS_MIN, 0, k,
bch2_snapshot_set_equiv(&trans, k));
if (ret) {
- bch_err(c, "error in bch2_snapshots_set_equiv: %i", ret);
+ bch_err(c, "error in bch2_snapshots_set_equiv: %s", bch2_err_str(ret));
goto err;
}
@@ -737,17 +691,27 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
bch2_trans_iter_exit(&trans, &iter);
if (ret) {
- bch_err(c, "error walking snapshots: %i", ret);
+ bch_err(c, "error walking snapshots: %s", bch2_err_str(ret));
goto err;
}
for (id = 0; id < BTREE_ID_NR; id++) {
+ struct bpos last_pos = POS_MIN;
+ snapshot_id_list equiv_seen = { 0 };
+
if (!btree_type_has_snapshots(id))
continue;
- ret = bch2_snapshot_delete_keys_btree(&trans, &deleted, id);
+ ret = for_each_btree_key_commit(&trans, iter,
+ id, POS_MIN,
+ BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k,
+ NULL, NULL, BTREE_INSERT_NOFAIL,
+ snapshot_delete_key(&trans, &iter, k, &deleted, &equiv_seen, &last_pos));
+
+ darray_exit(&equiv_seen);
+
if (ret) {
- bch_err(c, "error deleting snapshot keys: %i", ret);
+ bch_err(c, "error deleting snapshot keys: %s", bch2_err_str(ret));
goto err;
}
}
@@ -756,8 +720,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
ret = commit_do(&trans, NULL, NULL, 0,
bch2_snapshot_node_delete(&trans, deleted.data[i]));
if (ret) {
- bch_err(c, "error deleting snapshot %u: %i",
- deleted.data[i], ret);
+ bch_err(c, "error deleting snapshot %u: %s",
+ deleted.data[i], bch2_err_str(ret));
goto err;
}
}
@@ -913,6 +877,8 @@ int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
goto err;
ret = bch2_snapshot_node_set_deleted(trans, snapid);
+ if (ret)
+ goto err;
h = bch2_trans_kmalloc(trans, sizeof(*h));
ret = PTR_ERR_OR_ZERO(h);
@@ -949,7 +915,7 @@ void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_subvolume_delete(&trans, *id));
if (ret) {
- bch_err(c, "error %i deleting subvolume %u", ret, *id);
+ bch_err(c, "error deleting subvolume %u: %s", *id, bch2_err_str(ret));
break;
}
}
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 8b813099..55f8c65a 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -101,7 +101,7 @@ void bch2_sb_field_delete(struct bch_sb_handle *sb,
void bch2_free_super(struct bch_sb_handle *sb)
{
if (sb->bio)
- bio_put(sb->bio);
+ kfree(sb->bio);
if (!IS_ERR_OR_NULL(sb->bdev))
blkdev_put(sb->bdev, sb->mode);
@@ -143,13 +143,16 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
return -ENOMEM;
if (sb->have_bio) {
- bio = bio_kmalloc(GFP_KERNEL,
- DIV_ROUND_UP(new_buffer_size, PAGE_SIZE));
+ unsigned nr_bvecs = DIV_ROUND_UP(new_buffer_size, PAGE_SIZE);
+
+ bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
if (!bio)
return -ENOMEM;
+ bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0);
+
if (sb->bio)
- bio_put(sb->bio);
+ kfree(sb->bio);
sb->bio = bio;
}
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 29089740..7c634800 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -24,6 +24,7 @@
#include "debug.h"
#include "disk_groups.h"
#include "ec.h"
+#include "errcode.h"
#include "error.h"
#include "fs.h"
#include "fs-io.h"
@@ -930,31 +931,10 @@ out:
up_write(&c->state_lock);
return ret;
err:
- switch (ret) {
- case BCH_FSCK_ERRORS_NOT_FIXED:
- bch_err(c, "filesystem contains errors: please report this to the developers");
- pr_cont("mount with -o fix_errors to repair\n");
- break;
- case BCH_FSCK_REPAIR_UNIMPLEMENTED:
- bch_err(c, "filesystem contains errors: please report this to the developers");
- pr_cont("repair unimplemented: inform the developers so that it can be added\n");
- break;
- case BCH_FSCK_REPAIR_IMPOSSIBLE:
- bch_err(c, "filesystem contains errors, but repair impossible");
- break;
- case BCH_FSCK_UNKNOWN_VERSION:
- bch_err(c, "unknown metadata version");
- break;
- case -ENOMEM:
- bch_err(c, "cannot allocate memory");
- break;
- case -EIO:
- bch_err(c, "IO error");
- break;
- }
+ bch_err(c, "error starting filesystem: %s", bch2_err_str(ret));
- if (ret >= 0)
- ret = -EIO;
+ if (ret < -BCH_ERR_START)
+ ret = -EINVAL;
goto out;
}
@@ -1438,7 +1418,7 @@ static int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
BTREE_TRIGGER_NORUN, NULL);
if (ret)
- bch_err(c, "error %i removing dev alloc info", ret);
+ bch_err(c, "error removing dev alloc info: %s", bch2_err_str(ret));
return ret;
}
@@ -1466,7 +1446,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
if (ret) {
- bch_err(ca, "Remove failed: error %i dropping data", ret);
+ bch_err(ca, "Remove failed: error dropping data: %s", bch2_err_str(ret));
goto err;
}
@@ -1478,7 +1458,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
ret = bch2_journal_flush_device_pins(&c->journal, ca->dev_idx);
if (ret) {
- bch_err(ca, "Remove failed: error %i flushing journal", ret);
+ bch_err(ca, "Remove failed: error flushing journal: %s", bch2_err_str(ret));
goto err;
}
@@ -1490,7 +1470,7 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
ret = bch2_replicas_gc2(c);
if (ret) {
- bch_err(ca, "Remove failed: error %i from replicas gc", ret);
+ bch_err(ca, "Remove failed: error from replicas gc: %s", bch2_err_str(ret));
goto err;
}
@@ -1554,7 +1534,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
ret = bch2_read_super(path, &opts, &sb);
if (ret) {
- bch_err(c, "device add error: error reading super: %i", ret);
+ bch_err(c, "device add error: error reading super: %s", bch2_err_str(ret));
goto err;
}
@@ -1647,13 +1627,13 @@ have_slot:
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret) {
- bch_err(c, "device add error: error marking new superblock: %i", ret);
+ bch_err(c, "device add error: error marking new superblock: %s", bch2_err_str(ret));
goto err_late;
}
ret = bch2_fs_freespace_init(c);
if (ret) {
- bch_err(c, "device add error: error initializing free space: %i", ret);
+ bch_err(c, "device add error: error initializing free space: %s", bch2_err_str(ret));
goto err_late;
}
@@ -1715,8 +1695,8 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
ret = bch2_trans_mark_dev_sb(c, ca);
if (ret) {
- bch_err(c, "error bringing %s online: error %i from bch2_trans_mark_dev_sb",
- path, ret);
+ bch_err(c, "error bringing %s online: error from bch2_trans_mark_dev_sb: %s",
+ path, bch2_err_str(ret));
goto err;
}
@@ -1785,7 +1765,7 @@ int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
ret = bch2_dev_buckets_resize(c, ca, nbuckets);
if (ret) {
- bch_err(ca, "Resize error: %i", ret);
+ bch_err(ca, "Resize error: %s", bch2_err_str(ret));
goto err;
}
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index 57245caa..56058a56 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -46,7 +46,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(&trans, &iter, &k.k_i, 0));
if (ret) {
- bch_err(c, "update error in test_delete: %i", ret);
+ bch_err(c, "update error in test_delete: %s", bch2_err_str(ret));
goto err;
}
@@ -55,7 +55,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_delete_at(&trans, &iter, 0));
if (ret) {
- bch_err(c, "delete error (first) in test_delete: %i", ret);
+ bch_err(c, "delete error (first) in test_delete: %s", bch2_err_str(ret));
goto err;
}
@@ -64,7 +64,7 @@ static int test_delete(struct bch_fs *c, u64 nr)
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_delete_at(&trans, &iter, 0));
if (ret) {
- bch_err(c, "delete error (second) in test_delete: %i", ret);
+ bch_err(c, "delete error (second) in test_delete: %s", bch2_err_str(ret));
goto err;
}
err:
@@ -92,7 +92,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
bch2_btree_iter_traverse(&iter) ?:
bch2_trans_update(&trans, &iter, &k.k_i, 0));
if (ret) {
- bch_err(c, "update error in test_delete_written: %i", ret);
+ bch_err(c, "update error in test_delete_written: %s", bch2_err_str(ret));
goto err;
}
@@ -103,7 +103,7 @@ static int test_delete_written(struct bch_fs *c, u64 nr)
bch2_btree_iter_traverse(&iter) ?:
bch2_btree_delete_at(&trans, &iter, 0));
if (ret) {
- bch_err(c, "delete error in test_delete_written: %i", ret);
+ bch_err(c, "delete error in test_delete_written: %s", bch2_err_str(ret));
goto err;
}
err:
@@ -136,7 +136,7 @@ static int test_iterate(struct bch_fs *c, u64 nr)
ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
NULL, NULL, 0);
if (ret) {
- bch_err(c, "insert error in test_iterate: %i", ret);
+ bch_err(c, "insert error in test_iterate: %s", bch2_err_str(ret));
goto err;
}
}
@@ -145,20 +145,30 @@ static int test_iterate(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), 0, k, ret) {
- if (k.k->p.inode)
- break;
-
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0, k, ({
BUG_ON(k.k->p.offset != i++);
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+ goto err;
}
BUG_ON(i != nr);
pr_info("iterating backwards");
- while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k))
- BUG_ON(k.k->p.offset != --i);
+ ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, U64_MAX, U32_MAX), 0, k,
+ ({
+ BUG_ON(k.k->p.offset != --i);
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret));
+ goto err;
+ }
BUG_ON(i);
err:
@@ -192,7 +202,7 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
NULL, NULL, 0);
if (ret) {
- bch_err(c, "insert error in test_iterate_extents: %i", ret);
+ bch_err(c, "insert error in test_iterate_extents: %s", bch2_err_str(ret));
goto err;
}
}
@@ -201,19 +211,31 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX), 0, k, ret) {
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), 0, k, ({
BUG_ON(bkey_start_offset(k.k) != i);
i = k.k->p.offset;
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+ goto err;
}
BUG_ON(i != nr);
pr_info("iterating backwards");
- while (!IS_ERR_OR_NULL((k = bch2_btree_iter_prev(&iter)).k)) {
- BUG_ON(k.k->p.offset != i);
- i = bkey_start_offset(k.k);
+ ret = for_each_btree_key_reverse(&trans, iter, BTREE_ID_extents,
+ SPOS(0, U64_MAX, U32_MAX), 0, k,
+ ({
+ BUG_ON(k.k->p.offset != i);
+ i = bkey_start_offset(k.k);
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating backwards: %s", __func__, bch2_err_str(ret));
+ goto err;
}
BUG_ON(i);
@@ -247,7 +269,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
ret = bch2_btree_insert(c, BTREE_ID_xattrs, &k.k_i,
NULL, NULL, 0);
if (ret) {
- bch_err(c, "insert error in test_iterate_slots: %i", ret);
+ bch_err(c, "insert error in test_iterate_slots: %s", bch2_err_str(ret));
goto err;
}
}
@@ -256,15 +278,16 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), 0, k, ret) {
- if (k.k->p.inode)
- break;
-
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0, k, ({
BUG_ON(k.k->p.offset != i);
i += 2;
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+ goto err;
}
- bch2_trans_iter_exit(&trans, &iter);
BUG_ON(i != nr * 2);
@@ -272,17 +295,23 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX),
- BTREE_ITER_SLOTS, k, ret) {
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_SLOTS, k, ({
+ if (i >= nr * 2)
+ break;
+
BUG_ON(k.k->p.offset != i);
BUG_ON(bkey_deleted(k.k) != (i & 1));
i++;
- if (i == nr * 2)
- break;
+ 0;
+ }));
+ if (ret < 0) {
+ bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret));
+ goto err;
}
- bch2_trans_iter_exit(&trans, &iter);
+ ret = 0;
err:
bch2_trans_exit(&trans);
return ret;
@@ -313,7 +342,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
NULL, NULL, 0);
if (ret) {
- bch_err(c, "insert error in test_iterate_slots_extents: %i", ret);
+ bch_err(c, "insert error in test_iterate_slots_extents: %s", bch2_err_str(ret));
goto err;
}
}
@@ -322,13 +351,17 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX), 0, k, ret) {
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX), 0, k, ({
BUG_ON(bkey_start_offset(k.k) != i + 8);
BUG_ON(k.k->size != 8);
i += 16;
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating forwards: %s", __func__, bch2_err_str(ret));
+ goto err;
}
- bch2_trans_iter_exit(&trans, &iter);
BUG_ON(i != nr);
@@ -336,19 +369,23 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr)
i = 0;
- for_each_btree_key(&trans, iter, BTREE_ID_extents,
- SPOS(0, 0, U32_MAX),
- BTREE_ITER_SLOTS, k, ret) {
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_extents,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_SLOTS, k, ({
+ if (i == nr)
+ break;
BUG_ON(bkey_deleted(k.k) != !(i % 16));
BUG_ON(bkey_start_offset(k.k) != i);
BUG_ON(k.k->size != 8);
i = k.k->p.offset;
-
- if (i == nr)
- break;
+ 0;
+ }));
+ if (ret) {
+ bch_err(c, "%s(): error iterating forwards by slots: %s", __func__, bch2_err_str(ret));
+ goto err;
}
- bch2_trans_iter_exit(&trans, &iter);
+ ret = 0;
err:
bch2_trans_exit(&trans);
return 0;
@@ -368,10 +405,10 @@ static int test_peek_end(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, U32_MAX), 0);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
BUG_ON(k.k);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
BUG_ON(k.k);
bch2_trans_iter_exit(&trans, &iter);
@@ -389,10 +426,10 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr)
bch2_trans_iter_init(&trans, &iter, BTREE_ID_extents,
SPOS(0, 0, U32_MAX), 0);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
BUG_ON(k.k);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
BUG_ON(k.k);
bch2_trans_iter_exit(&trans, &iter);
@@ -419,7 +456,7 @@ static int insert_test_extent(struct bch_fs *c,
ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i,
NULL, NULL, 0);
if (ret)
- bch_err(c, "insert error in insert_test_extent: %i", ret);
+ bch_err(c, "insert error in insert_test_extent: %s", bch2_err_str(ret));
return ret;
}
@@ -482,7 +519,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi)
bch2_trans_init(&trans, c, 0, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_xattrs,
SPOS(0, 0, snapid_lo), 0);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
BUG_ON(k.k->p.snapshot != U32_MAX);
@@ -518,7 +555,7 @@ static int test_snapshots(struct bch_fs *c, u64 nr)
ret = test_snapshot_filter(c, snapids[0], snapids[1]);
if (ret) {
- bch_err(c, "err %i from test_snapshot_filter", ret);
+ bch_err(c, "err from test_snapshot_filter: %s", bch2_err_str(ret));
return ret;
}
@@ -555,7 +592,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
ret = commit_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));
if (ret) {
- bch_err(c, "error in rand_insert: %i", ret);
+ bch_err(c, "error in rand_insert: %s", bch2_err_str(ret));
break;
}
}
@@ -591,7 +628,7 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr)
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[6].k_i) ?:
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k[7].k_i));
if (ret) {
- bch_err(c, "error in rand_insert_multi: %i", ret);
+ bch_err(c, "error in rand_insert_multi: %s", bch2_err_str(ret));
break;
}
}
@@ -615,10 +652,10 @@ static int rand_lookup(struct bch_fs *c, u64 nr)
for (i = 0; i < nr; i++) {
bch2_btree_iter_set_pos(&iter, SPOS(0, test_rand(), U32_MAX));
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(&trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
ret = bkey_err(k);
if (ret) {
- bch_err(c, "error in rand_lookup: %i", ret);
+ bch_err(c, "error in rand_lookup: %s", bch2_err_str(ret));
break;
}
}
@@ -638,10 +675,10 @@ static int rand_mixed_trans(struct btree_trans *trans,
bch2_btree_iter_set_pos(iter, SPOS(0, pos, U32_MAX));
- k = bch2_btree_iter_peek(iter);
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(iter)));
ret = bkey_err(k);
- if (ret && ret != -EINTR)
- bch_err(trans->c, "lookup error in rand_mixed: %i", ret);
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+ bch_err(trans->c, "lookup error in rand_mixed: %s", bch2_err_str(ret));
if (ret)
return ret;
@@ -671,7 +708,7 @@ static int rand_mixed(struct bch_fs *c, u64 nr)
ret = commit_do(&trans, NULL, NULL, 0,
rand_mixed_trans(&trans, &iter, &cookie, i, rand));
if (ret) {
- bch_err(c, "update error in rand_mixed: %i", ret);
+ bch_err(c, "update error in rand_mixed: %s", bch2_err_str(ret));
break;
}
}
@@ -689,7 +726,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek(&iter);
+ lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(&iter)));
ret = bkey_err(k);
if (ret)
goto err;
@@ -717,7 +754,7 @@ static int rand_delete(struct bch_fs *c, u64 nr)
ret = commit_do(&trans, NULL, NULL, 0,
__do_delete(&trans, pos));
if (ret) {
- bch_err(c, "error in rand_delete: %i", ret);
+ bch_err(c, "error in rand_delete: %s", bch2_err_str(ret));
break;
}
}
@@ -733,28 +770,23 @@ static int seq_insert(struct bch_fs *c, u64 nr)
struct bkey_s_c k;
struct bkey_i_cookie insert;
int ret = 0;
- u64 i = 0;
bkey_cookie_init(&insert.k_i);
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
- insert.k.p = iter.pos;
-
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, &insert.k_i, 0));
- if (ret) {
- bch_err(c, "error in seq_insert: %i", ret);
- break;
- }
-
- if (++i == nr)
- break;
- }
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k,
+ NULL, NULL, 0,
+ ({
+ if (iter.pos.offset >= nr)
+ break;
+ insert.k.p = iter.pos;
+ bch2_trans_update(&trans, &iter, &insert.k_i, 0);
+ }));
+ if (ret)
+ bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
bch2_trans_exit(&trans);
return ret;
@@ -769,10 +801,11 @@ static int seq_lookup(struct bch_fs *c, u64 nr)
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX), 0, k, ret)
- ;
- bch2_trans_iter_exit(&trans, &iter);
+ ret = for_each_btree_key2(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX), 0, k,
+ 0);
+ if (ret)
+ bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
bch2_trans_exit(&trans);
return ret;
@@ -787,22 +820,18 @@ static int seq_overwrite(struct bch_fs *c, u64 nr)
bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
- SPOS(0, 0, U32_MAX),
- BTREE_ITER_INTENT, k, ret) {
- struct bkey_i_cookie u;
-
- bkey_reassemble(&u.k_i, k);
+ ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_xattrs,
+ SPOS(0, 0, U32_MAX),
+ BTREE_ITER_INTENT, k,
+ NULL, NULL, 0,
+ ({
+ struct bkey_i_cookie u;
- ret = commit_do(&trans, NULL, NULL, 0,
- bch2_btree_iter_traverse(&iter) ?:
- bch2_trans_update(&trans, &iter, &u.k_i, 0));
- if (ret) {
- bch_err(c, "error in seq_overwrite: %i", ret);
- break;
- }
- }
- bch2_trans_iter_exit(&trans, &iter);
+ bkey_reassemble(&u.k_i, k);
+ bch2_trans_update(&trans, &iter, &u.k_i, 0);
+ }));
+ if (ret)
+ bch_err(c, "error in %s(): %s", __func__, bch2_err_str(ret));
bch2_trans_exit(&trans);
return ret;
@@ -816,7 +845,7 @@ static int seq_delete(struct bch_fs *c, u64 nr)
SPOS(0, 0, U32_MAX), SPOS_MAX,
0, NULL);
if (ret)
- bch_err(c, "error in seq_delete: %i", ret);
+ bch_err(c, "error in seq_delete: %s", bch2_err_str(ret));
return ret;
}
@@ -853,7 +882,7 @@ static int btree_perf_test_thread(void *data)
ret = j->fn(j->c, div64_u64(j->nr, j->nr_threads));
if (ret) {
- bch_err(j->c, "%ps: error %i", j->fn, ret);
+ bch_err(j->c, "%ps: error %s", j->fn, bch2_err_str(ret));
j->ret = ret;
}
diff --git a/libbcachefs/trace.c b/libbcachefs/trace.c
index 59e8dfa3..70573981 100644
--- a/libbcachefs/trace.c
+++ b/libbcachefs/trace.c
@@ -2,11 +2,13 @@
#include "bcachefs.h"
#include "alloc_types.h"
#include "buckets.h"
-#include "btree_types.h"
+#include "btree_iter.h"
+#include "btree_locking.h"
#include "keylist.h"
+#include "opts.h"
#include <linux/blktrace_api.h>
-#include "keylist.h"
+#include <linux/six.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bcachefs.h>
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 8ef4b591..ee2c7d9e 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -376,31 +376,37 @@ void bch2_time_stats_to_text(struct printbuf *out, struct time_stats *stats)
u64 q, last_q = 0;
int i;
- prt_printf(out, "count:\t\t%llu\n",
+ prt_printf(out, "count:\t\t%llu",
stats->count);
- prt_printf(out, "rate:\t\t%llu/sec\n",
+ prt_newline(out);
+ prt_printf(out, "rate:\t\t%llu/sec",
freq ? div64_u64(NSEC_PER_SEC, freq) : 0);
+ prt_newline(out);
prt_printf(out, "frequency:\t");
pr_time_units(out, freq);
- prt_printf(out, "\navg duration:\t");
+ prt_newline(out);
+ prt_printf(out, "avg duration:\t");
pr_time_units(out, stats->average_duration);
- prt_printf(out, "\nmax duration:\t");
+ prt_newline(out);
+ prt_printf(out, "max duration:\t");
pr_time_units(out, stats->max_duration);
i = eytzinger0_first(NR_QUANTILES);
u = pick_time_units(stats->quantiles.entries[i].m);
- prt_printf(out, "\nquantiles (%s):\t", u->name);
+ prt_newline(out);
+ prt_printf(out, "quantiles (%s):\t", u->name);
eytzinger0_for_each(i, NR_QUANTILES) {
bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1;
q = max(stats->quantiles.entries[i].m, last_q);
- prt_printf(out, "%llu%s",
- div_u64(q, u->nsecs),
- is_last ? "\n" : " ");
+ prt_printf(out, "%llu ",
+ div_u64(q, u->nsecs));
+ if (is_last)
+ prt_newline(out);
last_q = q;
}
}
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index 12361271..186ffab5 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -344,7 +344,7 @@ retry:
offset = iter.pos.offset;
bch2_trans_iter_exit(&trans, &iter);
err:
- if (ret == -EINTR)
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
bch2_trans_exit(&trans);