summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-24 21:06:58 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2019-03-24 21:06:58 -0400
commitddb58076ef4fe4572ab8537785fc67052f47bf5b (patch)
tree57e0c770dab82d9cf7bb02e40dd1126701657958 /libbcachefs
parent0894d547501fb9306ae7b1c669b66a7b49fb2205 (diff)
Update bcachefs sources to ffe09df106 bcachefs: Verify fs hasn't been modified before going rw
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/acl.c4
-rw-r--r--libbcachefs/alloc_background.c173
-rw-r--r--libbcachefs/alloc_foreground.c21
-rw-r--r--libbcachefs/bcachefs.h41
-rw-r--r--libbcachefs/btree_gc.c13
-rw-r--r--libbcachefs/btree_io.c8
-rw-r--r--libbcachefs/btree_types.h11
-rw-r--r--libbcachefs/btree_update.h88
-rw-r--r--libbcachefs/btree_update_interior.c24
-rw-r--r--libbcachefs/btree_update_leaf.c801
-rw-r--r--libbcachefs/buckets.c108
-rw-r--r--libbcachefs/buckets.h14
-rw-r--r--libbcachefs/dirent.c4
-rw-r--r--libbcachefs/ec.c128
-rw-r--r--libbcachefs/extents.c177
-rw-r--r--libbcachefs/extents.h18
-rw-r--r--libbcachefs/fs.c11
-rw-r--r--libbcachefs/fsck.c112
-rw-r--r--libbcachefs/inode.c29
-rw-r--r--libbcachefs/io.c63
-rw-r--r--libbcachefs/journal.c2
-rw-r--r--libbcachefs/journal_io.c35
-rw-r--r--libbcachefs/migrate.c28
-rw-r--r--libbcachefs/move.c44
-rw-r--r--libbcachefs/quota.c19
-rw-r--r--libbcachefs/recovery.c42
-rw-r--r--libbcachefs/replicas.c48
-rw-r--r--libbcachefs/replicas.h2
-rw-r--r--libbcachefs/str_hash.h19
-rw-r--r--libbcachefs/super-io.c88
-rw-r--r--libbcachefs/super-io.h5
-rw-r--r--libbcachefs/super.c132
-rw-r--r--libbcachefs/super.h5
-rw-r--r--libbcachefs/super_types.h1
-rw-r--r--libbcachefs/sysfs.c4
-rw-r--r--libbcachefs/tests.c95
-rw-r--r--libbcachefs/xattr.c2
37 files changed, 1341 insertions, 1078 deletions
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 348060b2..e1c7b87d 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -266,8 +266,8 @@ int bch2_set_acl_trans(struct btree_trans *trans,
if (IS_ERR(xattr))
return PTR_ERR(xattr);
- ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
- inode_u->bi_inum, &xattr->k_i, 0);
+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
+ inode_u->bi_inum, &xattr->k_i, 0);
} else {
struct xattr_search_key search =
X_SEARCH(acl_to_xattr_type(type), "", 0);
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 62f639b8..1a40ac21 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -309,10 +309,54 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
return 0;
}
-static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
+int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+{
+ struct btree_trans trans;
+ struct btree_iter *iter;
+ struct bch_dev *ca;
+ int ret;
+
+ if (k->k.p.inode >= c->sb.nr_devices ||
+ !c->devs[k->k.p.inode])
+ return 0;
+
+ ca = bch_dev_bkey_exists(c, k->k.p.inode);
+
+ if (k->k.p.offset >= ca->mi.nbuckets)
+ return 0;
+
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
+ BTREE_ITER_INTENT);
+
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ goto err;
+
+ /* check buckets_written with btree node locked: */
+ if (test_bit(k->k.p.offset, ca->buckets_written)) {
+ ret = 0;
+ goto err;
+ }
+
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_JOURNAL_REPLAY|
+ BTREE_INSERT_NOMARK);
+err:
+ bch2_trans_exit(&trans);
+ return ret;
+}
+
+static int __bch2_alloc_write_key(struct btree_trans *trans, struct bch_dev *ca,
size_t b, struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
+ struct bch_fs *c = trans->c;
#if 0
__BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
#else
@@ -348,14 +392,15 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
bch2_btree_iter_cond_resched(iter);
- ret = bch2_btree_insert_at(c, NULL, journal_seq,
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
+
+ ret = bch2_trans_commit(trans, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_NOMARK|
- flags,
- BTREE_INSERT_ENTRY(iter, &a->k_i));
+ flags);
if (ret)
return ret;
@@ -369,42 +414,6 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
return 0;
}
-int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
-{
- struct bch_dev *ca;
- struct btree_iter iter;
- int ret;
-
- if (k->k.p.inode >= c->sb.nr_devices ||
- !c->devs[k->k.p.inode])
- return 0;
-
- ca = bch_dev_bkey_exists(c, k->k.p.inode);
-
- if (k->k.p.offset >= ca->mi.nbuckets)
- return 0;
-
- bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, k->k.p,
- BTREE_ITER_INTENT);
-
- ret = bch2_btree_iter_traverse(&iter);
- if (ret)
- goto err;
-
- /* check buckets_written with btree node locked: */
-
- ret = test_bit(k->k.p.offset, ca->buckets_written)
- ? 0
- : bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_REPLAY|
- BTREE_INSERT_NOMARK,
- BTREE_INSERT_ENTRY(&iter, k));
-err:
- bch2_btree_iter_unlock(&iter);
- return ret;
-}
-
int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
{
struct bch_dev *ca;
@@ -414,12 +423,15 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
*wrote = false;
for_each_rw_member(ca, c, i) {
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bucket_array *buckets;
size_t b;
- bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
down_read(&ca->bucket_lock);
buckets = bucket_array(ca);
@@ -430,7 +442,7 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
if (!buckets->b[b].mark.dirty)
continue;
- ret = __bch2_alloc_write_key(c, ca, b, &iter, NULL,
+ ret = __bch2_alloc_write_key(&trans, ca, b, iter, NULL,
nowait
? BTREE_INSERT_NOWAIT
: 0);
@@ -440,7 +452,8 @@ int bch2_alloc_write(struct bch_fs *c, bool nowait, bool *wrote)
*wrote = true;
}
up_read(&ca->bucket_lock);
- bch2_btree_iter_unlock(&iter);
+
+ bch2_trans_exit(&trans);
if (ret) {
percpu_ref_put(&ca->io_ref);
@@ -886,7 +899,8 @@ static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
}
}
-static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
+static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
+ struct bch_dev *ca,
struct btree_iter *iter,
u64 *journal_seq, unsigned flags)
{
@@ -896,6 +910,7 @@ static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
/* hack: */
__BKEY_PADDED(k, 8) alloc_key;
#endif
+ struct bch_fs *c = trans->c;
struct bkey_i_alloc *a;
struct bkey_alloc_unpacked u;
struct bucket_mark m;
@@ -958,6 +973,8 @@ retry:
a->k.p = iter->pos;
bch2_alloc_pack(a, u);
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
+
/*
* XXX:
* when using deferred btree updates, we have journal reclaim doing
@@ -965,16 +982,15 @@ retry:
* progress, and here the allocator is requiring space in the journal -
* so we need a journal pre-reservation:
*/
- ret = bch2_btree_insert_at(c, NULL,
- invalidating_cached_data ? journal_seq : NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOUNLOCK|
- BTREE_INSERT_NOCHECK_RW|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
- flags,
- BTREE_INSERT_ENTRY(iter, &a->k_i));
+ ret = bch2_trans_commit(trans, NULL,
+ invalidating_cached_data ? journal_seq : NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOUNLOCK|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE|
+ flags);
if (ret == -EINTR)
goto retry;
@@ -1048,23 +1064,27 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
*/
static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
u64 journal_seq = 0;
int ret = 0;
- bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
+ POS(ca->dev_idx, 0),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
/* Only use nowait if we've already invalidated at least one bucket: */
while (!ret &&
!fifo_full(&ca->free_inc) &&
ca->alloc_heap.used)
- ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
+ ret = bch2_invalidate_one_bucket2(&trans, ca, iter, &journal_seq,
BTREE_INSERT_GC_LOCK_HELD|
(!fifo_empty(&ca->free_inc)
? BTREE_INSERT_NOWAIT : 0));
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
/* If we used NOWAIT, don't return the error: */
if (!fifo_empty(&ca->free_inc))
@@ -1606,7 +1626,7 @@ static bool bch2_fs_allocator_start_fast(struct bch_fs *c)
return ret;
}
-static int __bch2_fs_allocator_start(struct bch_fs *c)
+int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned dev_iter;
@@ -1615,6 +1635,10 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
long bu;
int ret = 0;
+ if (!test_alloc_startup(c) &&
+ bch2_fs_allocator_start_fast(c))
+ return 0;
+
pr_debug("not enough empty buckets; scanning for reclaimable buckets");
/*
@@ -1689,31 +1713,6 @@ err:
return ret;
}
-int bch2_fs_allocator_start(struct bch_fs *c)
-{
- struct bch_dev *ca;
- unsigned i;
- int ret;
-
- ret = bch2_fs_allocator_start_fast(c) ? 0 :
- __bch2_fs_allocator_start(c);
- if (ret)
- return ret;
-
- set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
-
- for_each_rw_member(ca, c, i) {
- ret = bch2_dev_allocator_start(ca);
- if (ret) {
- percpu_ref_put(&ca->io_ref);
- return ret;
- }
- }
-
- set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
- return 0;
-}
-
void bch2_fs_allocator_background_init(struct bch_fs *c)
{
spin_lock_init(&c->freelist_lock);
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 6568e8ac..7fb1e5a4 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -245,6 +245,10 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
if (cl)
closure_wait(&c->open_buckets_wait, cl);
+
+ if (!c->blocked_allocate_open_bucket)
+ c->blocked_allocate_open_bucket = local_clock();
+
spin_unlock(&c->freelist_lock);
trace_open_bucket_alloc_fail(ca, reserve);
return ERR_PTR(-OPEN_BUCKETS_EMPTY);
@@ -275,6 +279,9 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
if (cl)
closure_wait(&c->freelist_wait, cl);
+ if (!c->blocked_allocate)
+ c->blocked_allocate = local_clock();
+
spin_unlock(&c->freelist_lock);
trace_bucket_alloc_fail(ca, reserve);
@@ -300,6 +307,20 @@ out:
bucket_io_clock_reset(c, ca, bucket, WRITE);
spin_unlock(&ob->lock);
+ if (c->blocked_allocate_open_bucket) {
+ bch2_time_stats_update(
+ &c->times[BCH_TIME_blocked_allocate_open_bucket],
+ c->blocked_allocate_open_bucket);
+ c->blocked_allocate_open_bucket = 0;
+ }
+
+ if (c->blocked_allocate) {
+ bch2_time_stats_update(
+ &c->times[BCH_TIME_blocked_allocate],
+ c->blocked_allocate);
+ c->blocked_allocate = 0;
+ }
+
spin_unlock(&c->freelist_lock);
bch2_wake_allocator(ca);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 052ec263..ac90d8aa 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -275,7 +275,11 @@ do { \
"cached data") \
BCH_DEBUG_PARAM(force_reconstruct_read, \
"Force reads to use the reconstruct path, when reading" \
- "from erasure coded extents")
+ "from erasure coded extents") \
+ BCH_DEBUG_PARAM(test_restart_gc, \
+ "Test restarting mark and sweep gc when bucket gens change")\
+ BCH_DEBUG_PARAM(test_reconstruct_alloc, \
+ "Test reconstructing the alloc btree")
#define BCH_DEBUG_PARAMS_ALL() BCH_DEBUG_PARAMS_ALWAYS() BCH_DEBUG_PARAMS_DEBUG()
@@ -287,10 +291,11 @@ do { \
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
+ x(btree_node_split) \
+ x(btree_node_sort) \
+ x(btree_node_read) \
x(btree_gc) \
- x(btree_split) \
- x(btree_sort) \
- x(btree_read) \
+ x(btree_update) \
x(btree_lock_contended_read) \
x(btree_lock_contended_intent) \
x(btree_lock_contended_write) \
@@ -299,8 +304,10 @@ do { \
x(data_promote) \
x(journal_write) \
x(journal_delay) \
- x(journal_blocked) \
- x(journal_flush_seq)
+ x(journal_flush_seq) \
+ x(blocked_journal) \
+ x(blocked_allocate) \
+ x(blocked_allocate_open_bucket)
enum bch_time_stats {
#define x(name) BCH_TIME_##name,
@@ -380,6 +387,7 @@ struct bch_dev {
char name[BDEVNAME_SIZE];
struct bch_sb_handle disk_sb;
+ struct bch_sb *sb_read_scratch;
int sb_write_error;
struct bch_devs_mask self;
@@ -476,6 +484,7 @@ enum {
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
+ BCH_FS_RW,
/* shutdown: */
BCH_FS_EMERGENCY_RO,
@@ -500,13 +509,6 @@ struct btree_debug {
struct dentry *failed;
};
-enum bch_fs_state {
- BCH_FS_STARTING = 0,
- BCH_FS_STOPPING,
- BCH_FS_RO,
- BCH_FS_RW,
-};
-
struct bch_fs_pcpu {
u64 sectors_available;
};
@@ -528,7 +530,6 @@ struct bch_fs {
/* ro/rw, add/remove devices: */
struct mutex state_lock;
- enum bch_fs_state state;
/* Counts outstanding writes, for clean transition to read-only */
struct percpu_ref writes;
@@ -632,7 +633,10 @@ struct bch_fs {
struct percpu_rw_semaphore mark_lock;
struct bch_fs_usage __percpu *usage[2];
- struct bch_fs_usage __percpu *usage_scratch;
+
+ /* single element mempool: */
+ struct mutex usage_scratch_lock;
+ struct bch_fs_usage *usage_scratch;
/*
* When we invalidate buckets, we use both the priority and the amount
@@ -647,6 +651,8 @@ struct bch_fs {
/* ALLOCATOR */
spinlock_t freelist_lock;
struct closure_waitlist freelist_wait;
+ u64 blocked_allocate;
+ u64 blocked_allocate_open_bucket;
u8 open_buckets_freelist;
u8 open_buckets_nr_free;
struct closure_waitlist open_buckets_wait;
@@ -785,11 +791,6 @@ static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
#endif
}
-static inline bool bch2_fs_running(struct bch_fs *c)
-{
- return c->state == BCH_FS_RO || c->state == BCH_FS_RW;
-}
-
static inline unsigned bucket_bytes(const struct bch_dev *ca)
{
return ca->mi.bucket_size << 9;
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 5d6f6364..af75878c 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -258,15 +258,14 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
return ret;
mutex_lock(&c->btree_root_lock);
-
b = c->btree_roots[btree_id].b;
if (!btree_node_fake(b))
- bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
- &max_stale, initial);
+ ret = bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key),
+ &max_stale, initial);
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
-
mutex_unlock(&c->btree_root_lock);
- return 0;
+
+ return ret;
}
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
@@ -747,7 +746,9 @@ again:
c->gc_count++;
out:
- if (!ret && test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
+ if (!ret &&
+ (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
+ (!iter && test_restart_gc(c)))) {
/*
* XXX: make sure gens we fixed got saved
*/
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 25aa22a0..f2107cf7 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -327,7 +327,7 @@ static void btree_node_sort(struct bch_fs *c, struct btree *b,
BUG_ON(vstruct_end(&out->keys) > (void *) out + (PAGE_SIZE << order));
if (sorting_entire_node)
- bch2_time_stats_update(&c->times[BCH_TIME_btree_sort],
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
start_time);
/* Make sure we preserve bset journal_seq: */
@@ -403,7 +403,8 @@ void bch2_btree_sort_into(struct bch_fs *c,
&dst->format,
true);
- bch2_time_stats_update(&c->times[BCH_TIME_btree_sort], start_time);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_sort],
+ start_time);
set_btree_bset_end(dst, dst->set);
@@ -989,7 +990,8 @@ start:
}
}
- bch2_time_stats_update(&c->times[BCH_TIME_btree_read], rb->start_time);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read],
+ rb->start_time);
bio_put(&rb->bio);
clear_btree_node_read_in_flight(b);
wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index a6aea023..d566722a 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -273,6 +273,7 @@ struct btree_insert_entry {
struct btree_trans {
struct bch_fs *c;
size_t nr_restarts;
+ u64 commit_start;
u64 iters_live;
u64 iters_linked;
@@ -289,6 +290,13 @@ struct btree_trans {
struct btree_iter *iters;
struct btree_insert_entry *updates;
+ /* update path: */
+ struct journal_res journal_res;
+ struct journal_preres journal_preres;
+ u64 *journal_seq;
+ struct disk_reservation *disk_res;
+ unsigned flags;
+
struct btree_iter iters_onstack[2];
struct btree_insert_entry updates_onstack[6];
};
@@ -489,12 +497,11 @@ struct btree_root {
enum btree_insert_ret {
BTREE_INSERT_OK,
- /* extent spanned multiple leaf nodes: have to traverse to next node: */
- BTREE_INSERT_NEED_TRAVERSE,
/* leaf node needs to be split */
BTREE_INSERT_BTREE_NODE_FULL,
BTREE_INSERT_ENOSPC,
BTREE_INSERT_NEED_MARK_REPLICAS,
+ BTREE_INSERT_NEED_JOURNAL_RES,
};
enum btree_gc_coalesce_fail_reason {
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 1f371b5a..ce5fa6b2 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -6,13 +6,12 @@
struct bch_fs;
struct btree;
-struct btree_insert;
void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
struct btree_iter *);
bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_i *);
-void bch2_btree_journal_key(struct btree_insert *trans, struct btree_iter *,
+void bch2_btree_journal_key(struct btree_trans *, struct btree_iter *,
struct bkey_i *);
void bch2_deferred_update_free(struct bch_fs *,
@@ -20,23 +19,6 @@ void bch2_deferred_update_free(struct bch_fs *,
struct deferred_update *
bch2_deferred_update_alloc(struct bch_fs *, enum btree_id, unsigned);
-/* Normal update interface: */
-
-struct btree_insert {
- struct bch_fs *c;
- struct disk_reservation *disk_res;
- struct journal_res journal_res;
- struct journal_preres journal_preres;
- u64 *journal_seq;
- unsigned flags;
- bool did_work;
-
- unsigned short nr;
- struct btree_insert_entry *entries;
-};
-
-int __bch2_btree_insert_at(struct btree_insert *);
-
#define BTREE_INSERT_ENTRY(_iter, _k) \
((struct btree_insert_entry) { \
.iter = (_iter), \
@@ -50,35 +32,12 @@ int __bch2_btree_insert_at(struct btree_insert *);
.deferred = true, \
})
-/**
- * bch_btree_insert_at - insert one or more keys at iterator positions
- * @iter: btree iterator
- * @insert_key: key to insert
- * @disk_res: disk reservation
- * @hook: extent insert callback
- *
- * Return values:
- * -EINTR: locking changed, this function should be called again. Only returned
- * if passed BTREE_INSERT_ATOMIC.
- * -EROFS: filesystem read only
- * -EIO: journal or btree node IO error
- */
-#define bch2_btree_insert_at(_c, _disk_res, _journal_seq, _flags, ...) \
- __bch2_btree_insert_at(&(struct btree_insert) { \
- .c = (_c), \
- .disk_res = (_disk_res), \
- .journal_seq = (_journal_seq), \
- .flags = (_flags), \
- .nr = COUNT_ARGS(__VA_ARGS__), \
- .entries = (struct btree_insert_entry[]) { \
- __VA_ARGS__ \
- }})
-
enum {
__BTREE_INSERT_ATOMIC,
__BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW,
+ __BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
@@ -105,6 +64,7 @@ enum {
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
+#define BTREE_INSERT_LAZY_RW (1 << __BTREE_INSERT_LAZY_RW)
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
@@ -125,10 +85,7 @@ enum {
#define BCH_HASH_SET_MUST_CREATE (1 << __BCH_HASH_SET_MUST_CREATE)
#define BCH_HASH_SET_MUST_REPLACE (1 << __BCH_HASH_SET_MUST_REPLACE)
-int bch2_btree_delete_at(struct btree_iter *, unsigned);
-
-int bch2_btree_insert_list_at(struct btree_iter *, struct keylist *,
- struct disk_reservation *, u64 *, unsigned);
+int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *,
struct disk_reservation *, u64 *, int flags);
@@ -141,8 +98,6 @@ int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *,
int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *,
struct btree *, struct bkey_i_btree_ptr *);
-/* new transactional interface: */
-
static inline void
bch2_trans_update(struct btree_trans *trans,
struct btree_insert_entry entry)
@@ -174,4 +129,39 @@ int bch2_trans_commit(struct btree_trans *,
_ret; \
})
+/*
+ * We sort transaction entries so that if multiple iterators point to the same
+ * leaf node they'll be adjacent:
+ */
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+ struct btree_insert_entry *i)
+{
+ return i != trans->updates &&
+ !i->deferred &&
+ i[0].iter->l[0].b == i[-1].iter->l[0].b;
+}
+
+#define __trans_next_update(_trans, _i, _filter) \
+({ \
+ while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
+ (_i)++; \
+ \
+ (_i) < (_trans)->updates + (_trans->nr_updates); \
+})
+
+#define __trans_for_each_update(_trans, _i, _filter) \
+ for ((_i) = (_trans)->updates; \
+ __trans_next_update(_trans, _i, _filter); \
+ (_i)++)
+
+#define trans_for_each_update(trans, i) \
+ __trans_for_each_update(trans, i, true)
+
+#define trans_for_each_update_iter(trans, i) \
+ __trans_for_each_update(trans, i, !(i)->deferred)
+
+#define trans_for_each_update_leaf(trans, i) \
+ __trans_for_each_update(trans, i, !(i)->deferred && \
+ !same_leaf_as_prev(trans, i))
+
#endif /* _BCACHEFS_BTREE_UPDATE_H */
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index b1b858de..47196c14 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -1074,8 +1074,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
__bch2_btree_set_root_inmem(c, b);
mutex_lock(&c->btree_interior_update_lock);
- percpu_down_read_preempt_disable(&c->mark_lock);
- fs_usage = bch2_fs_usage_get_scratch(c);
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
@@ -1088,7 +1088,8 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
- percpu_up_read_preempt_enable(&c->mark_lock);
+ bch2_fs_usage_scratch_put(c, fs_usage);
+ percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
}
@@ -1167,8 +1168,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
mutex_lock(&c->btree_interior_update_lock);
- percpu_down_read_preempt_disable(&c->mark_lock);
- fs_usage = bch2_fs_usage_get_scratch(c);
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
@@ -1189,7 +1190,8 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
- percpu_up_read_preempt_enable(&c->mark_lock);
+ bch2_fs_usage_scratch_put(c, fs_usage);
+ percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
bch2_btree_bset_insert_key(iter, b, node_iter, insert);
@@ -1437,7 +1439,8 @@ static void btree_split(struct btree_update *as, struct btree *b,
bch2_btree_iter_verify_locks(iter);
- bch2_time_stats_update(&c->times[BCH_TIME_btree_split], start_time);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_split],
+ start_time);
}
static void
@@ -1981,8 +1984,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bch2_btree_node_lock_write(b, iter);
mutex_lock(&c->btree_interior_update_lock);
- percpu_down_read_preempt_disable(&c->mark_lock);
- fs_usage = bch2_fs_usage_get_scratch(c);
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
@@ -1993,7 +1996,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
- percpu_up_read_preempt_enable(&c->mark_lock);
+ bch2_fs_usage_scratch_put(c, fs_usage);
+ percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 4a4904e7..e207b099 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -17,8 +17,64 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
-static bool btree_trans_relock(struct btree_insert *);
-static void btree_trans_unlock(struct btree_insert *);
+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
+ struct btree_iter *iter)
+{
+ bch2_btree_node_lock_write(b, iter);
+
+ if (btree_node_just_written(b) &&
+ bch2_btree_post_write_cleanup(c, b))
+ bch2_btree_iter_reinit_node(iter, b);
+
+ /*
+ * If the last bset has been written, or if it's gotten too big - start
+ * a new bset to insert into:
+ */
+ if (want_new_bset(c, b))
+ bch2_btree_init_next(c, b, iter);
+}
+
+static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ trans_for_each_update_leaf(trans, i)
+ bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
+}
+
+static void btree_trans_unlock_write(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ trans_for_each_update_leaf(trans, i)
+ bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+}
+
+static bool btree_trans_relock(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ trans_for_each_update_iter(trans, i)
+ return bch2_btree_iter_relock(i->iter);
+ return true;
+}
+
+static void btree_trans_unlock(struct btree_trans *trans)
+{
+ struct btree_insert_entry *i;
+
+ trans_for_each_update_iter(trans, i) {
+ bch2_btree_iter_unlock(i->iter);
+ break;
+ }
+}
+
+static inline int btree_trans_cmp(struct btree_insert_entry l,
+ struct btree_insert_entry r)
+{
+ return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?:
+ btree_iter_cmp(l.iter, r.iter);
+}
/* Inserting into a given leaf node (last stage of insert): */
@@ -129,7 +185,7 @@ static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin,
return __btree_node_flush(j, pin, 1, seq);
}
-static inline void __btree_journal_key(struct btree_insert *trans,
+static inline void __btree_journal_key(struct btree_trans *trans,
enum btree_id btree_id,
struct bkey_i *insert)
{
@@ -150,7 +206,7 @@ static inline void __btree_journal_key(struct btree_insert *trans,
*trans->journal_seq = seq;
}
-void bch2_btree_journal_key(struct btree_insert *trans,
+void bch2_btree_journal_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
@@ -184,9 +240,8 @@ void bch2_btree_journal_key(struct btree_insert *trans,
set_btree_node_dirty(b);
}
-static enum btree_insert_ret
-bch2_insert_fixup_key(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+static void bch2_insert_fixup_key(struct btree_trans *trans,
+ struct btree_insert_entry *insert)
{
struct btree_iter *iter = insert->iter;
struct btree_iter_level *l = &iter->l[0];
@@ -198,30 +253,25 @@ bch2_insert_fixup_key(struct btree_insert *trans,
if (bch2_btree_bset_insert_key(iter, l->b, &l->iter,
insert->k))
bch2_btree_journal_key(trans, iter, insert->k);
-
- return BTREE_INSERT_OK;
}
/**
* btree_insert_key - insert a key one key into a leaf node
*/
-static enum btree_insert_ret
-btree_insert_key_leaf(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+static void btree_insert_key_leaf(struct btree_trans *trans,
+ struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
- enum btree_insert_ret ret;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
- bch2_mark_update(trans, insert);
-
- ret = !btree_node_is_extents(b)
- ? bch2_insert_fixup_key(trans, insert)
- : bch2_insert_fixup_extent(trans, insert);
+ if (!btree_node_is_extents(b))
+ bch2_insert_fixup_key(trans, insert);
+ else
+ bch2_insert_fixup_extent(trans, insert);
live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -236,7 +286,6 @@ btree_insert_key_leaf(struct btree_insert *trans,
bch2_btree_iter_reinit_node(iter, b);
trace_btree_insert_key(c, b, insert->k);
- return ret;
}
/* Deferred btree updates: */
@@ -290,9 +339,8 @@ static void deferred_update_flush(struct journal *j,
kfree(k);
}
-static enum btree_insert_ret
-btree_insert_key_deferred(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+static void btree_insert_key_deferred(struct btree_trans *trans,
+ struct btree_insert_entry *insert)
{
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
@@ -320,8 +368,6 @@ btree_insert_key_deferred(struct btree_insert *trans,
bch2_journal_pin_update(j, trans->journal_res.seq, &d->journal,
deferred_update_flush);
spin_unlock(&d->lock);
-
- return BTREE_INSERT_OK;
}
void bch2_deferred_update_free(struct bch_fs *c,
@@ -357,106 +403,93 @@ bch2_deferred_update_alloc(struct bch_fs *c,
return d;
}
-/* struct btree_insert operations: */
+/* Normal update interface: */
-/*
- * We sort transaction entries so that if multiple iterators point to the same
- * leaf node they'll be adjacent:
- */
-static bool same_leaf_as_prev(struct btree_insert *trans,
- struct btree_insert_entry *i)
+static inline void btree_insert_entry_checks(struct btree_trans *trans,
+ struct btree_insert_entry *i)
{
- return i != trans->entries &&
- !i->deferred &&
- i[0].iter->l[0].b == i[-1].iter->l[0].b;
-}
-
-#define __trans_next_entry(_trans, _i, _filter) \
-({ \
- while ((_i) < (_trans)->entries + (_trans->nr) && !(_filter)) \
- (_i)++; \
- \
- (_i) < (_trans)->entries + (_trans->nr); \
-})
+ struct bch_fs *c = trans->c;
+ enum btree_id btree_id = !i->deferred
+ ? i->iter->btree_id
+ : i->d->btree_id;
-#define __trans_for_each_entry(_trans, _i, _filter) \
- for ((_i) = (_trans)->entries; \
- __trans_next_entry(_trans, _i, _filter); \
- (_i)++)
+ if (!i->deferred) {
+ BUG_ON(i->iter->level);
+ BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
+ EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ !bch2_extent_is_atomic(i->k, i->iter));
-#define trans_for_each_entry(trans, i) \
- __trans_for_each_entry(trans, i, true)
+ EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ !(trans->flags & BTREE_INSERT_ATOMIC));
-#define trans_for_each_iter(trans, i) \
- __trans_for_each_entry(trans, i, !(i)->deferred)
+ bch2_btree_iter_verify_locks(i->iter);
+ }
-#define trans_for_each_leaf(trans, i) \
- __trans_for_each_entry(trans, i, !(i)->deferred && \
- !same_leaf_as_prev(trans, i))
+ BUG_ON(debug_check_bkeys(c) &&
+ !bkey_deleted(&i->k->k) &&
+ bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id));
+}
-inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
- struct btree_iter *iter)
+static int bch2_trans_journal_preres_get(struct btree_trans *trans)
{
- bch2_btree_node_lock_write(b, iter);
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ unsigned u64s = 0;
+ int ret;
- if (btree_node_just_written(b) &&
- bch2_btree_post_write_cleanup(c, b))
- bch2_btree_iter_reinit_node(iter, b);
+ trans_for_each_update(trans, i)
+ if (i->deferred)
+ u64s += jset_u64s(i->k->k.u64s);
- /*
- * If the last bset has been written, or if it's gotten too big - start
- * a new bset to insert into:
- */
- if (want_new_bset(c, b))
- bch2_btree_init_next(c, b, iter);
-}
+ if (!u64s)
+ return 0;
-static void multi_lock_write(struct bch_fs *c, struct btree_insert *trans)
-{
- struct btree_insert_entry *i;
+ ret = bch2_journal_preres_get(&c->journal,
+ &trans->journal_preres, u64s,
+ JOURNAL_RES_GET_NONBLOCK);
+ if (ret != -EAGAIN)
+ return ret;
- trans_for_each_leaf(trans, i)
- bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
-}
+ btree_trans_unlock(trans);
-static void multi_unlock_write(struct btree_insert *trans)
-{
- struct btree_insert_entry *i;
+ ret = bch2_journal_preres_get(&c->journal,
+ &trans->journal_preres, u64s, 0);
+ if (ret)
+ return ret;
- trans_for_each_leaf(trans, i)
- bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
-}
+ if (!btree_trans_relock(trans)) {
+ trans_restart(" (iter relock after journal preres get blocked)");
+ return -EINTR;
+ }
-static inline int btree_trans_cmp(struct btree_insert_entry l,
- struct btree_insert_entry r)
-{
- return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?:
- btree_iter_cmp(l.iter, r.iter);
+ return 0;
}
-static bool btree_trans_relock(struct btree_insert *trans)
+static int bch2_trans_journal_res_get(struct btree_trans *trans,
+ unsigned flags)
{
+ struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
+ unsigned u64s = 0;
+ int ret;
- trans_for_each_iter(trans, i)
- return bch2_btree_iter_relock(i->iter);
- return true;
-}
+ if (unlikely(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+ return 0;
-static void btree_trans_unlock(struct btree_insert *trans)
-{
- struct btree_insert_entry *i;
+ if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
+ flags |= JOURNAL_RES_GET_RESERVED;
- trans_for_each_iter(trans, i) {
- bch2_btree_iter_unlock(i->iter);
- break;
- }
-}
+ trans_for_each_update(trans, i)
+ u64s += jset_u64s(i->k->k.u64s);
-/* Normal update interface: */
+ ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
+ u64s, flags);
+
+ return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
+}
static enum btree_insert_ret
-btree_key_can_insert(struct btree_insert *trans,
+btree_key_can_insert(struct btree_trans *trans,
struct btree_insert_entry *insert,
unsigned *u64s)
{
@@ -467,11 +500,6 @@ btree_key_can_insert(struct btree_insert *trans,
if (unlikely(btree_node_fake(b)))
return BTREE_INSERT_BTREE_NODE_FULL;
- if (!bch2_bkey_replicas_marked(c,
- bkey_i_to_s_c(insert->k),
- true))
- return BTREE_INSERT_NEED_MARK_REPLICAS;
-
ret = !btree_node_is_extents(b)
? BTREE_INSERT_OK
: bch2_extent_can_insert(trans, insert, u64s);
@@ -484,33 +512,71 @@ btree_key_can_insert(struct btree_insert *trans,
return BTREE_INSERT_OK;
}
-static inline enum btree_insert_ret
-do_btree_insert_one(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+static int btree_trans_check_can_insert(struct btree_trans *trans,
+ struct btree_insert_entry **stopped_at)
{
- return likely(!insert->deferred)
- ? btree_insert_key_leaf(trans, insert)
- : btree_insert_key_deferred(trans, insert);
+ struct btree_insert_entry *i;
+ unsigned u64s = 0;
+ int ret;
+
+ trans_for_each_update_iter(trans, i) {
+ /* Multiple inserts might go to same leaf: */
+ if (!same_leaf_as_prev(trans, i))
+ u64s = 0;
+
+ u64s += i->k->k.u64s;
+ ret = btree_key_can_insert(trans, i, &u64s);
+ if (ret) {
+ *stopped_at = i;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static inline void do_btree_insert_one(struct btree_trans *trans,
+ struct btree_insert_entry *insert)
+{
+ if (likely(!insert->deferred))
+ btree_insert_key_leaf(trans, insert);
+ else
+ btree_insert_key_deferred(trans, insert);
}
/*
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
-static inline int do_btree_insert_at(struct btree_insert *trans,
+static inline int do_btree_insert_at(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
+ struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
struct btree_iter *linked;
- unsigned u64s;
int ret;
-retry:
- trans_for_each_iter(trans, i)
+
+ trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
- memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+ btree_trans_lock_write(c, trans);
- multi_lock_write(c, trans);
+ trans_for_each_update_iter(trans, i) {
+ if (i->deferred ||
+ !btree_node_type_needs_gc(i->iter->btree_id))
+ continue;
+
+ if (!fs_usage) {
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
+ }
+
+ if (!bch2_bkey_replicas_marked_locked(c,
+ bkey_i_to_s_c(i->k), true)) {
+ ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+ goto out;
+ }
+ }
if (race_fault()) {
ret = -EINTR;
@@ -523,59 +589,24 @@ retry:
* held, otherwise another thread could write the node changing the
* amount of space available:
*/
- u64s = 0;
- trans_for_each_iter(trans, i) {
- /* Multiple inserts might go to same leaf: */
- if (!same_leaf_as_prev(trans, i))
- u64s = 0;
-
- u64s += i->k->k.u64s;
- ret = btree_key_can_insert(trans, i, &u64s);
- if (ret) {
- *stopped_at = i;
- goto out;
- }
- }
-
- if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
- unsigned flags = (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
- ? JOURNAL_RES_GET_RESERVED : 0;
-
- u64s = 0;
- trans_for_each_entry(trans, i)
- u64s += jset_u64s(i->k->k.u64s);
-
- ret = bch2_journal_res_get(&c->journal,
- &trans->journal_res, u64s,
- flags|JOURNAL_RES_GET_NONBLOCK);
- if (likely(!ret))
- goto got_journal_res;
- if (ret != -EAGAIN)
- goto out;
-
- multi_unlock_write(trans);
- btree_trans_unlock(trans);
-
- ret = bch2_journal_res_get(&c->journal,
- &trans->journal_res, u64s,
- flags|JOURNAL_RES_GET_CHECK);
- if (ret)
- return ret;
+ ret = btree_trans_check_can_insert(trans, stopped_at);
+ if (ret)
+ goto out;
- if (!btree_trans_relock(trans)) {
- trans_restart(" (iter relock after journal res get blocked)");
- return -EINTR;
- }
+ /*
+ * Don't get journal reservation until after we know insert will
+ * succeed:
+ */
+ ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
+ if (ret)
+ goto out;
- goto retry;
- }
-got_journal_res:
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
- trans_for_each_entry(trans, i)
+ trans_for_each_update(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
else if (inject_invalid_keys(c))
- trans_for_each_entry(trans, i)
+ trans_for_each_update(trans, i)
i->k->k.version = MAX_VERSION;
}
@@ -585,178 +616,51 @@ got_journal_res:
* have been traversed/locked, depending on what the caller was
* doing:
*/
- trans_for_each_iter(trans, i) {
+ trans_for_each_update_iter(trans, i) {
for_each_btree_iter(i->iter, linked)
if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
linked->flags |= BTREE_ITER_NOUNLOCK;
break;
}
}
- trans->did_work = true;
- trans_for_each_entry(trans, i) {
- switch (do_btree_insert_one(trans, i)) {
- case BTREE_INSERT_OK:
- break;
- case BTREE_INSERT_NEED_TRAVERSE:
- BUG_ON((trans->flags &
- (BTREE_INSERT_ATOMIC|BTREE_INSERT_NOUNLOCK)));
- ret = -EINTR;
- goto out;
- default:
- BUG();
- }
- }
+ trans_for_each_update_iter(trans, i)
+ bch2_mark_update(trans, i, fs_usage);
+ if (fs_usage)
+ bch2_trans_fs_usage_apply(trans, fs_usage);
+
+ trans_for_each_update(trans, i)
+ do_btree_insert_one(trans, i);
out:
BUG_ON(ret &&
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
- multi_unlock_write(trans);
- bch2_journal_res_put(&c->journal, &trans->journal_res);
-
- return ret;
-}
+ btree_trans_unlock_write(trans);
-static inline void btree_insert_entry_checks(struct bch_fs *c,
- struct btree_insert_entry *i)
-{
- enum btree_id btree_id = !i->deferred
- ? i->iter->btree_id
- : i->d->btree_id;
-
- if (!i->deferred) {
- BUG_ON(i->iter->level);
- BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
-
- bch2_btree_iter_verify_locks(i->iter);
+ if (fs_usage) {
+ bch2_fs_usage_scratch_put(c, fs_usage);
+ percpu_up_read(&c->mark_lock);
}
- BUG_ON(debug_check_bkeys(c) &&
- !bkey_deleted(&i->k->k) &&
- bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id));
+ bch2_journal_res_put(&c->journal, &trans->journal_res);
+
+ return ret;
}
-/**
- * __bch_btree_insert_at - insert keys at given iterator positions
- *
- * This is main entry point for btree updates.
- *
- * Return values:
- * -EINTR: locking changed, this function should be called again. Only returned
- * if passed BTREE_INSERT_ATOMIC.
- * -EROFS: filesystem read only
- * -EIO: journal or btree node IO error
- */
-int __bch2_btree_insert_at(struct btree_insert *trans)
+static noinline
+int bch2_trans_commit_error(struct btree_trans *trans,
+ struct btree_insert_entry *i,
+ int ret)
{
struct bch_fs *c = trans->c;
- struct btree_insert_entry *i;
- struct btree_iter *linked;
- unsigned flags, u64s = 0;
- int ret;
-
- BUG_ON(!trans->nr);
-
- /* for the sake of sanity: */
- BUG_ON(trans->nr > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
-
- if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
- lockdep_assert_held(&c->gc_lock);
-
- memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
- bubble_sort(trans->entries, trans->nr, btree_trans_cmp);
-
- trans_for_each_entry(trans, i)
- btree_insert_entry_checks(c, i);
-
- trans_for_each_entry(trans, i)
- if (i->deferred)
- u64s += jset_u64s(i->k->k.u64s);
-
- if (u64s) {
- ret = bch2_journal_preres_get(&c->journal,
- &trans->journal_preres, u64s,
- JOURNAL_RES_GET_NONBLOCK);
- if (!ret)
- goto got_journal_preres;
- if (ret != -EAGAIN)
- return ret;
-
- btree_trans_unlock(trans);
- ret = bch2_journal_preres_get(&c->journal,
- &trans->journal_preres, u64s, 0);
- if (ret)
- return ret;
-
- if (!btree_trans_relock(trans)) {
- trans_restart(" (iter relock after journal preres get blocked)");
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
- return -EINTR;
- }
- }
-got_journal_preres:
- if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
- !percpu_ref_tryget(&c->writes)))
- return -EROFS;
-retry:
- trans_for_each_iter(trans, i) {
- unsigned old_locks_want = i->iter->locks_want;
- unsigned old_uptodate = i->iter->uptodate;
-
- if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
- trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
- old_locks_want, old_uptodate);
- ret = -EINTR;
- goto err;
- }
-
- if (i->iter->flags & BTREE_ITER_ERROR) {
- ret = -EIO;
- goto err;
- }
- }
-
- ret = do_btree_insert_at(trans, &i);
- if (unlikely(ret))
- goto err;
-
- trans_for_each_leaf(trans, i)
- bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
-
- trans_for_each_iter(trans, i)
- bch2_btree_iter_downgrade(i->iter);
-out:
- bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
- if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
- percpu_ref_put(&c->writes);
-
- /* make sure we didn't drop or screw up locks: */
- trans_for_each_iter(trans, i) {
- bch2_btree_iter_verify_locks(i->iter);
- break;
- }
-
- trans_for_each_iter(trans, i) {
- for_each_btree_iter(i->iter, linked)
- linked->flags &= ~BTREE_ITER_NOUNLOCK;
- break;
- }
-
- BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
-
- return ret;
-err:
- flags = trans->flags;
+ unsigned flags = trans->flags;
/*
* BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
* update; if we haven't done anything yet it doesn't apply
*/
- if (!trans->did_work)
- flags &= ~BTREE_INSERT_NOUNLOCK;
+ flags &= ~BTREE_INSERT_NOUNLOCK;
switch (ret) {
case BTREE_INSERT_BTREE_NODE_FULL:
@@ -772,8 +676,12 @@ err:
* XXX:
* split -> btree node merging (of parent node) might still drop
* locks when we're not passing it BTREE_INSERT_NOUNLOCK
+ *
+ * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
+ * will inhibit merging - but we don't have a reliable way yet
+ * (do we?) of checking if we dropped locks in this path
*/
- if (!ret && !trans->did_work)
+ if (!ret)
goto retry;
#endif
@@ -790,14 +698,32 @@ err:
ret = -ENOSPC;
break;
case BTREE_INSERT_NEED_MARK_REPLICAS:
- if (flags & BTREE_INSERT_NOUNLOCK) {
- ret = -EINTR;
- goto out;
+ bch2_trans_unlock(trans);
+
+ trans_for_each_update_iter(trans, i) {
+ ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
+ if (ret)
+ return ret;
}
- bch2_btree_iter_unlock(trans->entries[0].iter);
- ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k))
- ?: -EINTR;
+ if (btree_trans_relock(trans))
+ return 0;
+
+ trans_restart(" (iter relock after marking replicas)");
+ ret = -EINTR;
+ break;
+ case BTREE_INSERT_NEED_JOURNAL_RES:
+ btree_trans_unlock(trans);
+
+ ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
+ if (ret)
+ return ret;
+
+ if (btree_trans_relock(trans))
+ return 0;
+
+ trans_restart(" (iter relock after journal res get blocked)");
+ ret = -EINTR;
break;
default:
BUG_ON(ret >= 0);
@@ -805,17 +731,11 @@ err:
}
if (ret == -EINTR) {
- if (flags & BTREE_INSERT_NOUNLOCK) {
- trans_restart(" (can't unlock)");
- goto out;
- }
-
- trans_for_each_iter(trans, i) {
+ trans_for_each_update_iter(trans, i) {
int ret2 = bch2_btree_iter_traverse(i->iter);
if (ret2) {
- ret = ret2;
trans_restart(" (traverse)");
- goto out;
+ return ret2;
}
BUG_ON(i->iter->uptodate > BTREE_ITER_NEED_PEEK);
@@ -826,12 +746,73 @@ err:
* dropped locks:
*/
if (!(flags & BTREE_INSERT_ATOMIC))
- goto retry;
+ return 0;
trans_restart(" (atomic)");
}
- goto out;
+ return ret;
+}
+
+/**
+ * __bch_btree_insert_at - insert keys at given iterator positions
+ *
+ * This is main entry point for btree updates.
+ *
+ * Return values:
+ * -EINTR: locking changed, this function should be called again. Only returned
+ * if passed BTREE_INSERT_ATOMIC.
+ * -EROFS: filesystem read only
+ * -EIO: journal or btree node IO error
+ */
+static int __bch2_trans_commit(struct btree_trans *trans,
+ struct btree_insert_entry **stopped_at)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ struct btree_iter *linked;
+ int ret;
+
+ trans_for_each_update_iter(trans, i) {
+ unsigned old_locks_want = i->iter->locks_want;
+ unsigned old_uptodate = i->iter->uptodate;
+
+ if (!bch2_btree_iter_upgrade(i->iter, 1, true)) {
+ trans_restart(" (failed upgrade, locks_want %u uptodate %u)",
+ old_locks_want, old_uptodate);
+ ret = -EINTR;
+ goto err;
+ }
+
+ if (i->iter->flags & BTREE_ITER_ERROR) {
+ ret = -EIO;
+ goto err;
+ }
+ }
+
+ ret = do_btree_insert_at(trans, stopped_at);
+ if (unlikely(ret))
+ goto err;
+
+ trans_for_each_update_leaf(trans, i)
+ bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
+
+ trans_for_each_update_iter(trans, i)
+ bch2_btree_iter_downgrade(i->iter);
+err:
+ /* make sure we didn't drop or screw up locks: */
+ trans_for_each_update_iter(trans, i) {
+ bch2_btree_iter_verify_locks(i->iter);
+ break;
+ }
+
+ trans_for_each_update_iter(trans, i) {
+ for_each_btree_iter(i->iter, linked)
+ linked->flags &= ~BTREE_ITER_NOUNLOCK;
+ break;
+ }
+
+ return ret;
}
int bch2_trans_commit(struct btree_trans *trans,
@@ -839,60 +820,100 @@ int bch2_trans_commit(struct btree_trans *trans,
u64 *journal_seq,
unsigned flags)
{
- struct btree_insert insert = {
- .c = trans->c,
- .disk_res = disk_res,
- .journal_seq = journal_seq,
- .flags = flags,
- .nr = trans->nr_updates,
- .entries = trans->updates,
- };
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ int ret = 0;
if (!trans->nr_updates)
- return 0;
+ goto out_noupdates;
- trans->nr_updates = 0;
+ /* for the sake of sanity: */
+ BUG_ON(trans->nr_updates > 1 && !(flags & BTREE_INSERT_ATOMIC));
- return __bch2_btree_insert_at(&insert);
-}
+ if (flags & BTREE_INSERT_GC_LOCK_HELD)
+ lockdep_assert_held(&c->gc_lock);
-int bch2_btree_delete_at(struct btree_iter *iter, unsigned flags)
-{
- struct bkey_i k;
+ if (!trans->commit_start)
+ trans->commit_start = local_clock();
- bkey_init(&k.k);
- k.k.p = iter->pos;
+ memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+ memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
+ trans->disk_res = disk_res;
+ trans->journal_seq = journal_seq;
+ trans->flags = flags;
- return bch2_btree_insert_at(iter->c, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|flags,
- BTREE_INSERT_ENTRY(iter, &k));
-}
+ bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp);
-int bch2_btree_insert_list_at(struct btree_iter *iter,
- struct keylist *keys,
- struct disk_reservation *disk_res,
- u64 *journal_seq, unsigned flags)
-{
- BUG_ON(flags & BTREE_INSERT_ATOMIC);
- BUG_ON(bch2_keylist_empty(keys));
- bch2_verify_keylist_sorted(keys);
-
- while (!bch2_keylist_empty(keys)) {
- int ret = bch2_btree_insert_at(iter->c, disk_res,
- journal_seq, flags,
- BTREE_INSERT_ENTRY(iter, bch2_keylist_front(keys)));
+ trans_for_each_update(trans, i)
+ btree_insert_entry_checks(trans, i);
+
+ if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
+ !percpu_ref_tryget(&c->writes))) {
+ if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)))
+ return -EROFS;
+
+ btree_trans_unlock(trans);
+
+ ret = bch2_fs_read_write_early(c);
if (ret)
return ret;
- bch2_keylist_pop_front(keys);
+ percpu_ref_get(&c->writes);
+
+ if (!btree_trans_relock(trans)) {
+ ret = -EINTR;
+ goto err;
+ }
}
+retry:
+ ret = bch2_trans_journal_preres_get(trans);
+ if (ret)
+ goto err;
- return 0;
+ ret = __bch2_trans_commit(trans, &i);
+ if (ret)
+ goto err;
+out:
+ bch2_journal_preres_put(&c->journal, &trans->journal_preres);
+
+ if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
+ percpu_ref_put(&c->writes);
+out_noupdates:
+ if (!ret && trans->commit_start) {
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_update],
+ trans->commit_start);
+ trans->commit_start = 0;
+ }
+
+ trans->nr_updates = 0;
+
+ BUG_ON(!(trans->flags & BTREE_INSERT_ATOMIC) && ret == -EINTR);
+
+ return ret;
+err:
+ ret = bch2_trans_commit_error(trans, i, ret);
+ if (!ret)
+ goto retry;
+
+ goto out;
+}
+
+int bch2_btree_delete_at(struct btree_trans *trans,
+ struct btree_iter *iter, unsigned flags)
+{
+ struct bkey_i k;
+
+ bkey_init(&k.k);
+ k.k.p = iter->pos;
+
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &k));
+ return bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|flags);
}
/**
- * bch_btree_insert - insert keys into the extent btree
+ * bch2_btree_insert - insert keys into the extent btree
* @c: pointer to struct bch_fs
* @id: btree to insert into
* @insert_keys: list of keys to insert
@@ -903,14 +924,19 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id,
struct disk_reservation *disk_res,
u64 *journal_seq, int flags)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
int ret;
- bch2_btree_iter_init(&iter, c, id, bkey_start_pos(&k->k),
- BTREE_ITER_INTENT);
- ret = bch2_btree_insert_at(c, disk_res, journal_seq, flags,
- BTREE_INSERT_ENTRY(&iter, k));
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, id, bkey_start_pos(&k->k),
+ BTREE_ITER_INTENT);
+
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
+
+ ret = bch2_trans_commit(&trans, disk_res, journal_seq, flags);
+ bch2_trans_exit(&trans);
return ret;
}
@@ -924,16 +950,18 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
struct bpos start, struct bpos end,
u64 *journal_seq)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
int ret = 0;
- bch2_btree_iter_init(&iter, c, id, start,
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
- while ((k = bch2_btree_iter_peek(&iter)).k &&
+ while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k)) &&
- bkey_cmp(iter.pos, end) < 0) {
+ bkey_cmp(iter->pos, end) < 0) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
/* really shouldn't be using a bare, unpadded bkey_i */
struct bkey_i delete;
@@ -950,23 +978,28 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
* (bch2_btree_iter_peek() does guarantee that iter.pos >=
* bkey_start_pos(k.k)).
*/
- delete.k.p = iter.pos;
+ delete.k.p = iter->pos;
- if (iter.flags & BTREE_ITER_IS_EXTENTS) {
+ if (iter->flags & BTREE_ITER_IS_EXTENTS) {
/* create the biggest key we can */
bch2_key_resize(&delete.k, max_sectors);
bch2_cut_back(end, &delete.k);
+ bch2_extent_trim_atomic(&delete, iter);
}
- ret = bch2_btree_insert_at(c, NULL, journal_seq,
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &delete));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &delete));
+
+ ret = bch2_trans_commit(&trans, NULL, journal_seq,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL);
+ if (ret == -EINTR)
+ ret = 0;
if (ret)
break;
- bch2_btree_iter_cond_resched(&iter);
+ bch2_btree_iter_cond_resched(iter);
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
return ret;
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 072d22ae..dae718dc 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -143,6 +143,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
+{
+ if (fs_usage == c->usage_scratch)
+ mutex_unlock(&c->usage_scratch_lock);
+ else
+ kfree(fs_usage);
+}
+
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
+{
+ struct bch_fs_usage *ret;
+ unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
+
+ ret = kzalloc(bytes, GFP_NOWAIT);
+ if (ret)
+ return ret;
+
+ if (mutex_trylock(&c->usage_scratch_lock))
+ goto out_pool;
+
+ ret = kzalloc(bytes, GFP_NOFS);
+ if (ret)
+ return ret;
+
+ mutex_lock(&c->usage_scratch_lock);
+out_pool:
+ ret = c->usage_scratch;
+ memset(ret, 0, bytes);
+ return ret;
+}
+
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_dev_usage ret;
@@ -290,8 +321,10 @@ int bch2_fs_usage_apply(struct bch_fs *c,
fs_usage->online_reserved -= added;
}
+ preempt_disable();
acc_u64s((u64 *) this_cpu_ptr(c->usage[0]),
(u64 *) fs_usage, fs_usage_u64s(c));
+ preempt_enable();
return ret;
}
@@ -549,7 +582,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type type,
unsigned sectors, bool gc)
{
- struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
struct bucket *g = __bucket(ca, b, gc);
struct bucket_mark old, new;
bool overflow;
@@ -568,7 +600,8 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
old.dirty_sectors, sectors);
if (c)
- bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+ bch2_dev_usage_update(c, ca, this_cpu_ptr(c->usage[gc]),
+ old, new, gc);
return 0;
}
@@ -897,31 +930,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
unsigned journal_seq, unsigned flags,
bool gc)
{
+ int ret = 0;
+
+ preempt_disable();
+
if (!fs_usage || gc)
fs_usage = this_cpu_ptr(c->usage[gc]);
switch (k.k->type) {
case KEY_TYPE_alloc:
- return bch2_mark_alloc(c, k, inserting,
+ ret = bch2_mark_alloc(c, k, inserting,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_btree_ptr:
- return bch2_mark_extent(c, k, inserting
+ ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_extent:
- return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+ ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_stripe:
- return bch2_mark_stripe(c, k, inserting,
+ ret = bch2_mark_stripe(c, k, inserting,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_inode:
if (inserting)
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
- return 0;
+ break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -931,11 +972,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
- return 0;
+ break;
}
- default:
- return 0;
}
+
+ preempt_enable();
+
+ return ret;
}
int bch2_mark_key_locked(struct bch_fs *c,
@@ -966,25 +1009,20 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
return ret;
}
-void bch2_mark_update(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+void bch2_mark_update(struct btree_trans *trans,
+ struct btree_insert_entry *insert,
+ struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
- struct bch_fs_usage *fs_usage;
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
- u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
- static int warned_disk_usage = 0;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
- percpu_down_read_preempt_disable(&c->mark_lock);
- fs_usage = bch2_fs_usage_get_scratch(c);
-
if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
@@ -1037,16 +1075,32 @@ void bch2_mark_update(struct btree_insert *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
+}
- if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
- !warned_disk_usage &&
- !xchg(&warned_disk_usage, 1)) {
- char buf[200];
+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
+ struct bch_fs_usage *fs_usage)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ static int warned_disk_usage = 0;
+ u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+ char buf[200];
- pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+ if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
+ warned_disk_usage ||
+ xchg(&warned_disk_usage, 1))
+ return;
+
+ pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+
+ trans_for_each_update_iter(trans, i) {
+ struct btree_iter *iter = i->iter;
+ struct btree *b = iter->l[0].b;
+ struct btree_node_iter node_iter = iter->l[0].iter;
+ struct bkey_packed *_k;
pr_err("while inserting");
- bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
pr_err("%s", buf);
pr_err("overlapping with");
@@ -1059,8 +1113,8 @@ void bch2_mark_update(struct btree_insert *trans,
k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b)
- ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
- : bkey_cmp(insert->k->k.p, k.k->p))
+ ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
+ : bkey_cmp(i->k->k.p, k.k->p))
break;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
@@ -1069,8 +1123,6 @@ void bch2_mark_update(struct btree_insert *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
-
- percpu_up_read_preempt_enable(&c->mark_lock);
}
/* Disk reservations: */
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 0725aa94..c9706fa0 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -218,13 +218,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
READ_ONCE(c->replicas.nr);
}
-static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
-{
- struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
-
- memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
- return ret;
-}
+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
@@ -255,10 +250,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
-void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
+void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
+ struct bch_fs_usage *);
+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
+
/* disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index 9a400085..550561e6 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -150,8 +150,8 @@ int __bch2_dirent_create(struct btree_trans *trans,
if (ret)
return ret;
- return __bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
- dir_inum, &dirent->k_i, flags);
+ return bch2_hash_set(trans, bch2_dirent_hash_desc, hash_info,
+ dir_inum, &dirent->k_i, flags);
}
int bch2_dirent_create(struct bch_fs *c, u64 dir_inum,
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 8018c2bc..ea6f4867 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -628,36 +628,12 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
/* stripe deletion */
-static void ec_stripe_delete(struct bch_fs *c, size_t idx)
+static int ec_stripe_delete(struct bch_fs *c, size_t idx)
{
- struct btree_iter iter;
- struct bch_stripe *v = NULL;
- struct bkey_s_c k;
- struct bkey_i delete;
- u64 journal_seq = 0;
-
- bch2_btree_iter_init(&iter, c, BTREE_ID_EC,
- POS(0, idx),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
- if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe)
- goto out;
-
- v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL);
- BUG_ON(!v);
- memcpy(v, bkey_s_c_to_stripe(k).v, bkey_val_bytes(k.k));
-
- bkey_init(&delete.k);
- delete.k.p = iter.pos;
-
- bch2_btree_insert_at(c, NULL, &journal_seq,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_NOUNLOCK,
- BTREE_INSERT_ENTRY(&iter, &delete));
-out:
- bch2_btree_iter_unlock(&iter);
- kfree(v);
+ return bch2_btree_delete_range(c, BTREE_ID_EC,
+ POS(0, idx),
+ POS(0, idx + 1),
+ NULL);
}
static void ec_stripe_delete_work(struct work_struct *work)
@@ -689,39 +665,46 @@ static void ec_stripe_delete_work(struct work_struct *work)
static int ec_stripe_bkey_insert(struct bch_fs *c,
struct bkey_i_stripe *stripe)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
int ret;
- /* XXX: start pos hint */
+ bch2_trans_init(&trans, c);
retry:
- for_each_btree_key(&iter, c, BTREE_ID_EC, POS_MIN,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
- if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0) {
- bch2_btree_iter_unlock(&iter);
- return -ENOSPC;
- }
+ bch2_trans_begin(&trans);
+
+ /* XXX: start pos hint */
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ for_each_btree_key_continue(iter, BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
+ if (bkey_cmp(k.k->p, POS(0, U32_MAX)) > 0)
+ break;
if (bkey_deleted(k.k))
goto found_slot;
}
- return bch2_btree_iter_unlock(&iter) ?: -ENOSPC;
+ ret = -ENOSPC;
+ goto out;
found_slot:
- ret = ec_stripe_mem_alloc(c, &iter);
+ ret = ec_stripe_mem_alloc(c, iter);
if (ret == -EINTR)
goto retry;
if (ret)
return ret;
- stripe->k.p = iter.pos;
+ stripe->k.p = iter->pos;
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE,
- BTREE_INSERT_ENTRY(&iter, &stripe->k_i));
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE);
+out:
+ bch2_trans_exit(&trans);
return ret;
}
@@ -748,23 +731,26 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
struct ec_stripe_buf *s,
struct bkey *pos)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
BKEY_PADDED(k) tmp;
int ret = 0, dev, idx;
- bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
- bkey_start_pos(pos),
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
- while ((k = bch2_btree_iter_peek(&iter)).k &&
- !btree_iter_err(k) &&
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ bkey_start_pos(pos),
+ BTREE_ITER_INTENT);
+
+ while ((k = bch2_btree_iter_peek(iter)).k &&
+ !(ret = btree_iter_err(k)) &&
bkey_cmp(bkey_start_pos(k.k), pos->p) < 0) {
idx = extent_matches_stripe(c, &s->key.v, k);
if (idx < 0) {
- bch2_btree_iter_next(&iter);
+ bch2_btree_iter_next(iter);
continue;
}
@@ -782,18 +768,21 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
extent_stripe_ptr_add(e, s, ptr, idx);
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE,
- BTREE_INSERT_ENTRY(&iter, &tmp.k));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.k));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE);
if (ret == -EINTR)
ret = 0;
if (ret)
break;
}
- return bch2_btree_iter_unlock(&iter) ?: ret;
+ bch2_trans_exit(&trans);
+
+ return ret;
}
/*
@@ -1162,13 +1151,14 @@ unlock:
mutex_unlock(&c->ec_new_stripe_lock);
}
-static int __bch2_stripe_write_key(struct bch_fs *c,
+static int __bch2_stripe_write_key(struct btree_trans *trans,
struct btree_iter *iter,
struct stripe *m,
size_t idx,
struct bkey_i_stripe *new_key,
unsigned flags)
{
+ struct bch_fs *c = trans->c;
struct bkey_s_c k;
unsigned i;
int ret;
@@ -1194,14 +1184,16 @@ static int __bch2_stripe_write_key(struct bch_fs *c,
spin_unlock(&c->ec_stripes_heap_lock);
- return bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL|flags,
- BTREE_INSERT_ENTRY(iter, &new_key->k_i));
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &new_key->k_i));
+
+ return bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|flags);
}
int bch2_stripes_write(struct bch_fs *c, bool *wrote)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct genradix_iter giter;
struct bkey_i_stripe *new_key;
struct stripe *m;
@@ -1210,14 +1202,16 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
new_key = kmalloc(255 * sizeof(u64), GFP_KERNEL);
BUG_ON(!new_key);
- bch2_btree_iter_init(&iter, c, BTREE_ID_EC, POS_MIN,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EC, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
genradix_for_each(&c->stripes[0], giter, m) {
if (!m->dirty)
continue;
- ret = __bch2_stripe_write_key(c, &iter, m, giter.pos,
+ ret = __bch2_stripe_write_key(&trans, iter, m, giter.pos,
new_key, BTREE_INSERT_NOCHECK_RW);
if (ret)
break;
@@ -1225,7 +1219,7 @@ int bch2_stripes_write(struct bch_fs *c, bool *wrote)
*wrote = true;
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
kfree(new_key);
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 369b100a..1ab951c9 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -782,18 +782,6 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst,
return true;
}
-struct extent_insert_state {
- struct btree_insert *trans;
- struct btree_insert_entry *insert;
- struct bpos committed;
-
- /* for deleting: */
- struct bkey_i whiteout;
- bool update_journal;
- bool update_btree;
- bool deleting;
-};
-
static bool bch2_extent_merge_inline(struct bch_fs *,
struct btree_iter *,
struct bkey_packed *,
@@ -880,67 +868,29 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
bch2_btree_iter_verify(iter, l->b);
}
-static void extent_insert_committed(struct extent_insert_state *s)
+static inline struct bpos
+bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter)
{
- struct bch_fs *c = s->trans->c;
- struct btree_iter *iter = s->insert->iter;
- struct bkey_i *insert = s->insert->k;
- BKEY_PADDED(k) split;
-
- EBUG_ON(bkey_cmp(insert->k.p, s->committed) < 0);
- EBUG_ON(bkey_cmp(s->committed, bkey_start_pos(&insert->k)) < 0);
-
- bkey_copy(&split.k, insert);
- if (s->deleting)
- split.k.k.type = KEY_TYPE_discard;
-
- bch2_cut_back(s->committed, &split.k.k);
-
- if (!bkey_cmp(s->committed, iter->pos))
- return;
-
- bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
-
- if (s->update_btree) {
- if (debug_check_bkeys(c))
- bch2_bkey_debugcheck(c, iter->l[0].b,
- bkey_i_to_s_c(&split.k));
-
- EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
-
- extent_bset_insert(c, iter, &split.k);
- }
-
- if (s->update_journal) {
- bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout);
- if (s->deleting)
- split.k.k.type = KEY_TYPE_discard;
-
- bch2_cut_back(s->committed, &split.k.k);
-
- EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
-
- bch2_btree_journal_key(s->trans, iter, &split.k);
- }
+ struct btree *b = iter->l[0].b;
- bch2_cut_front(s->committed, insert);
+ BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+ BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
- insert->k.needs_whiteout = false;
+ return bpos_min(k->k.p, b->key.k.p);
}
void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
{
- struct btree *b = iter->l[0].b;
-
- BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-
- bch2_cut_back(b->key.k.p, &k->k);
+ bch2_cut_back(bch2_extent_atomic_end(k, iter), &k->k);
+}
- BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
+bool bch2_extent_is_atomic(struct bkey_i *k, struct btree_iter *iter)
+{
+ return !bkey_cmp(bch2_extent_atomic_end(k, iter), k->k.p);
}
enum btree_insert_ret
-bch2_extent_can_insert(struct btree_insert *trans,
+bch2_extent_can_insert(struct btree_trans *trans,
struct btree_insert_entry *insert,
unsigned *u64s)
{
@@ -952,9 +902,6 @@ bch2_extent_can_insert(struct btree_insert *trans,
struct bkey_s_c k;
int sectors;
- BUG_ON(trans->flags & BTREE_INSERT_ATOMIC &&
- !bch2_extent_is_atomic(&insert->k->k, insert->iter));
-
/*
* We avoid creating whiteouts whenever possible when deleting, but
* those optimizations mean we may potentially insert two whiteouts
@@ -998,12 +945,11 @@ bch2_extent_can_insert(struct btree_insert *trans,
}
static void
-extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
+extent_squash(struct bch_fs *c, struct btree_iter *iter,
+ struct bkey_i *insert,
struct bkey_packed *_k, struct bkey_s k,
enum bch_extent_overlap overlap)
{
- struct bch_fs *c = s->trans->c;
- struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
switch (overlap) {
@@ -1089,34 +1035,39 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
}
}
-static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
+struct extent_insert_state {
+ struct bkey_i whiteout;
+ bool update_journal;
+ bool update_btree;
+ bool deleting;
+};
+
+static void __bch2_insert_fixup_extent(struct bch_fs *c,
+ struct btree_iter *iter,
+ struct bkey_i *insert,
+ struct extent_insert_state *s)
{
- struct btree_iter *iter = s->insert->iter;
struct btree_iter_level *l = &iter->l[0];
struct bkey_packed *_k;
struct bkey unpacked;
- struct bkey_i *insert = s->insert->k;
- while (bkey_cmp(s->committed, insert->k.p) < 0 &&
- (_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
+ while ((_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b,
KEY_TYPE_discard))) {
struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked);
- enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k);
-
- EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
+ struct bpos cur_end = bpos_min(insert->k.p, k.k->p);
+ enum bch_extent_overlap overlap =
+ bch2_extent_overlap(&insert->k, k.k);
if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
break;
- s->committed = bpos_min(s->insert->k->k.p, k.k->p);
-
if (!bkey_whiteout(k.k))
s->update_journal = true;
if (!s->update_journal) {
- bch2_cut_front(s->committed, insert);
- bch2_cut_front(s->committed, &s->whiteout);
- bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
+ bch2_cut_front(cur_end, insert);
+ bch2_cut_front(cur_end, &s->whiteout);
+ bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
goto next;
}
@@ -1150,19 +1101,16 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
_k->needs_whiteout = false;
}
- extent_squash(s, insert, _k, k, overlap);
+ extent_squash(c, iter, insert, _k, k, overlap);
if (!s->update_btree)
- bch2_cut_front(s->committed, insert);
+ bch2_cut_front(cur_end, insert);
next:
if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
overlap == BCH_EXTENT_OVERLAP_MIDDLE)
break;
}
- if (bkey_cmp(s->committed, insert->k.p) < 0)
- s->committed = bpos_min(s->insert->k->k.p, l->b->key.k.p);
-
/*
* may have skipped past some deleted extents greater than the insert
* key, before we got to a non deleted extent and knew we could bail out
@@ -1172,7 +1120,7 @@ next:
struct btree_node_iter node_iter = l->iter;
while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
- bkey_cmp_left_packed(l->b, _k, &s->committed) > 0)
+ bkey_cmp_left_packed(l->b, _k, &insert->k.p) > 0)
l->iter = node_iter;
}
}
@@ -1216,48 +1164,55 @@ next:
* If the end of iter->pos is not the same as the end of insert, then
* key insertion needs to continue/be retried.
*/
-enum btree_insert_ret
-bch2_insert_fixup_extent(struct btree_insert *trans,
- struct btree_insert_entry *insert)
+void bch2_insert_fixup_extent(struct btree_trans *trans,
+ struct btree_insert_entry *insert)
{
+ struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
- struct btree *b = iter->l[0].b;
struct extent_insert_state s = {
- .trans = trans,
- .insert = insert,
- .committed = iter->pos,
-
.whiteout = *insert->k,
.update_journal = !bkey_whiteout(&insert->k->k),
.update_btree = !bkey_whiteout(&insert->k->k),
.deleting = bkey_whiteout(&insert->k->k),
};
+ BKEY_PADDED(k) tmp;
EBUG_ON(iter->level);
EBUG_ON(!insert->k->k.size);
-
- /*
- * As we process overlapping extents, we advance @iter->pos both to
- * signal to our caller (btree_insert_key()) how much of @insert->k has
- * been inserted, and also to keep @iter->pos consistent with
- * @insert->k and the node iterator that we're advancing:
- */
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
- __bch2_insert_fixup_extent(&s);
+ __bch2_insert_fixup_extent(c, iter, insert->k, &s);
- extent_insert_committed(&s);
+ bch2_btree_iter_set_pos_same_leaf(iter, insert->k->k.p);
- EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
- EBUG_ON(bkey_cmp(iter->pos, s.committed));
+ if (s.update_btree) {
+ bkey_copy(&tmp.k, insert->k);
+
+ if (s.deleting)
+ tmp.k.k.type = KEY_TYPE_discard;
+#if 0
+ /* disabled due to lock recursion - mark_lock: */
+ if (debug_check_bkeys(c))
+ bch2_bkey_debugcheck(c, iter->l[0].b,
+ bkey_i_to_s_c(&tmp.k));
+#endif
+ EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
- if (insert->k->k.size) {
- /* got to the end of this leaf node */
- BUG_ON(bkey_cmp(iter->pos, b->key.k.p));
- return BTREE_INSERT_NEED_TRAVERSE;
+ extent_bset_insert(c, iter, &tmp.k);
}
- return BTREE_INSERT_OK;
+ if (s.update_journal) {
+ bkey_copy(&tmp.k, !s.deleting ? insert->k : &s.whiteout);
+
+ if (s.deleting)
+ tmp.k.k.type = KEY_TYPE_discard;
+
+ EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
+
+ bch2_btree_journal_key(trans, iter, &tmp.k);
+ }
+
+ bch2_cut_front(insert->k->k.p, insert->k);
}
const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k)
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 698b2581..77d69841 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -6,7 +6,7 @@
#include "extents_types.h"
struct bch_fs;
-struct btree_insert;
+struct btree_trans;
struct btree_insert_entry;
/* extent entries: */
@@ -406,21 +406,13 @@ enum merge_result bch2_reservation_merge(struct bch_fs *,
}
void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *);
-
-static inline bool bch2_extent_is_atomic(struct bkey *k,
- struct btree_iter *iter)
-{
- struct btree *b = iter->l[0].b;
-
- return bkey_cmp(k->p, b->key.k.p) <= 0 &&
- bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0;
-}
+bool bch2_extent_is_atomic(struct bkey_i *, struct btree_iter *);
enum btree_insert_ret
-bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *,
+bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
unsigned *);
-enum btree_insert_ret
-bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *);
+void bch2_insert_fixup_extent(struct btree_trans *,
+ struct btree_insert_entry *);
void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
unsigned, unsigned);
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 55fc88d3..f0560675 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -1530,7 +1530,7 @@ static struct bch_fs *__bch2_open_as_blockdevs(const char *dev_name, char * cons
mutex_lock(&c->state_lock);
- if (!bch2_fs_running(c)) {
+ if (!test_bit(BCH_FS_STARTED, &c->flags)) {
mutex_unlock(&c->state_lock);
closure_put(&c->cl);
pr_err("err mounting %s: incomplete filesystem", dev_name);
@@ -1586,8 +1586,6 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
return ret;
if (opts.read_only != c->opts.read_only) {
- const char *err = NULL;
-
mutex_lock(&c->state_lock);
if (opts.read_only) {
@@ -1595,9 +1593,10 @@ static int bch2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags |= MS_RDONLY;
} else {
- err = bch2_fs_read_write(c);
- if (err) {
- bch_err(c, "error going rw: %s", err);
+ ret = bch2_fs_read_write(c);
+ if (ret) {
+ bch_err(c, "error going rw: %i", ret);
+ mutex_unlock(&c->state_lock);
return -EINVAL;
}
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 42bd2f7a..fb0cb9a4 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -151,7 +151,7 @@ static void hash_check_set_inode(struct hash_check *h, struct bch_fs *c,
}
static int hash_redo_key(const struct bch_hash_desc desc,
- struct hash_check *h, struct bch_fs *c,
+ struct btree_trans *trans, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k,
u64 hashed)
{
@@ -164,15 +164,17 @@ static int hash_redo_key(const struct bch_hash_desc desc,
bkey_reassemble(tmp, k);
- ret = bch2_btree_delete_at(k_iter, 0);
+ ret = bch2_btree_delete_at(trans, k_iter, 0);
if (ret)
goto err;
bch2_btree_iter_unlock(k_iter);
- bch2_hash_set(desc, &h->info, c, k_iter->pos.inode, NULL, tmp,
- BTREE_INSERT_NOFAIL|
- BCH_HASH_SET_MUST_CREATE);
+ bch2_hash_set(trans, desc, &h->info, k_iter->pos.inode,
+ tmp, BCH_HASH_SET_MUST_CREATE);
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
err:
kfree(tmp);
return ret;
@@ -202,7 +204,8 @@ retry:
ret = bch2_hash_delete_at(&trans, desc, info, iter) ?:
bch2_trans_commit(&trans, NULL, NULL,
BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL);
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
err:
if (ret == -EINTR)
goto retry;
@@ -271,9 +274,10 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc,
}
static int hash_check_key(const struct bch_hash_desc desc,
- struct hash_check *h, struct bch_fs *c,
+ struct btree_trans *trans, struct hash_check *h,
struct btree_iter *k_iter, struct bkey_s_c k)
{
+ struct bch_fs *c = trans->c;
char buf[200];
u64 hashed;
int ret = 0;
@@ -299,7 +303,7 @@ static int hash_check_key(const struct bch_hash_desc desc,
hashed, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
- ret = hash_redo_key(desc, h, c, k_iter, k, hashed);
+ ret = hash_redo_key(desc, trans, h, k_iter, k, hashed);
if (ret) {
bch_err(c, "hash_redo_key err %i", ret);
return ret;
@@ -312,9 +316,10 @@ fsck_err:
return ret;
}
-static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
+static int check_dirent_hash(struct btree_trans *trans, struct hash_check *h,
struct btree_iter *iter, struct bkey_s_c *k)
{
+ struct bch_fs *c = trans->c;
struct bkey_i_dirent *d = NULL;
int ret = -EINVAL;
char buf[200];
@@ -359,9 +364,11 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c,
if (fsck_err(c, "dirent with junk at end, was %s (%zu) now %s (%u)",
buf, strlen(buf), d->v.d_name, len)) {
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(iter, &d->k_i));
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &d->k_i));
+
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
if (ret)
goto err;
@@ -383,8 +390,8 @@ err_redo:
k->k->p.offset, hash, h->chain->pos.offset,
(bch2_bkey_val_to_text(&PBUF(buf), c,
*k), buf))) {
- ret = hash_redo_key(bch2_dirent_hash_desc,
- h, c, iter, *k, hash);
+ ret = hash_redo_key(bch2_dirent_hash_desc, trans,
+ h, iter, *k, hash);
if (ret)
bch_err(c, "hash_redo_key err %i", ret);
else
@@ -531,7 +538,7 @@ static int check_dirents(struct bch_fs *c)
mode_to_type(w.inode.bi_mode),
(bch2_bkey_val_to_text(&PBUF(buf), c,
k), buf))) {
- ret = bch2_btree_delete_at(iter, 0);
+ ret = bch2_btree_delete_at(&trans, iter, 0);
if (ret)
goto err;
continue;
@@ -540,7 +547,7 @@ static int check_dirents(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
- ret = check_dirent_hash(&h, c, iter, &k);
+ ret = check_dirent_hash(&trans, &h, iter, &k);
if (ret > 0) {
ret = 0;
continue;
@@ -622,9 +629,12 @@ static int check_dirents(struct bch_fs *c)
bkey_reassemble(&n->k_i, d.s_c);
n->v.d_type = mode_to_type(target.bi_mode);
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(iter, &n->k_i));
+ bch2_trans_update(&trans,
+ BTREE_INSERT_ENTRY(iter, &n->k_i));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
kfree(n);
if (ret)
goto err;
@@ -668,7 +678,7 @@ static int check_xattrs(struct bch_fs *c)
if (fsck_err_on(!w.have_inode, c,
"xattr for missing inode %llu",
k.k->p.inode)) {
- ret = bch2_btree_delete_at(iter, 0);
+ ret = bch2_btree_delete_at(&trans, iter, 0);
if (ret)
goto err;
continue;
@@ -677,7 +687,7 @@ static int check_xattrs(struct bch_fs *c)
if (w.first_this_inode && w.have_inode)
hash_check_set_inode(&h, c, &w.inode);
- ret = hash_check_key(bch2_xattr_hash_desc, &h, c, iter, k);
+ ret = hash_check_key(bch2_xattr_hash_desc, &trans, &h, iter, k);
if (ret)
goto fsck_err;
}
@@ -1162,12 +1172,13 @@ fsck_err:
return ret;
}
-static int check_inode(struct bch_fs *c,
+static int check_inode(struct btree_trans *trans,
struct bch_inode_unpacked *lostfound_inode,
struct btree_iter *iter,
struct bkey_s_c_inode inode,
struct nlink *link)
{
+ struct bch_fs *c = trans->c;
struct bch_inode_unpacked u;
bool do_update = false;
int ret = 0;
@@ -1258,10 +1269,11 @@ static int check_inode(struct bch_fs *c,
struct bkey_inode_buf p;
bch2_inode_pack(&p, &u);
+ bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(iter, &p.inode.k_i));
+ ret = bch2_trans_commit(trans, NULL, NULL,
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW);
if (ret && ret != -EINTR)
bch_err(c, "error in fs gc: error %i "
"updating inode", ret);
@@ -1276,25 +1288,29 @@ static int bch2_gc_walk_inodes(struct bch_fs *c,
nlink_table *links,
u64 range_start, u64 range_end)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct nlink *link, zero_links = { 0, 0 };
struct genradix_iter nlinks_iter;
int ret = 0, ret2 = 0;
u64 nlinks_pos;
- bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0), 0);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
+ POS(range_start, 0), 0);
nlinks_iter = genradix_iter_init(links, 0);
- while ((k = bch2_btree_iter_peek(&iter)).k &&
- !btree_iter_err(k)) {
+ while ((k = bch2_btree_iter_peek(iter)).k &&
+ !(ret2 = btree_iter_err(k))) {
peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
- if (!link && (!k.k || iter.pos.inode >= range_end))
+ if (!link && (!k.k || iter->pos.inode >= range_end))
break;
nlinks_pos = range_start + nlinks_iter.pos;
- if (iter.pos.inode > nlinks_pos) {
+ if (iter->pos.inode > nlinks_pos) {
/* Should have been caught by dirents pass: */
need_fsck_err_on(link && link->count, c,
"missing inode %llu (nlink %u)",
@@ -1303,7 +1319,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
goto peek_nlinks;
}
- if (iter.pos.inode < nlinks_pos || !link)
+ if (iter->pos.inode < nlinks_pos || !link)
link = &zero_links;
if (k.k && k.k->type == KEY_TYPE_inode) {
@@ -1311,9 +1327,9 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
* Avoid potential deadlocks with iter for
* truncate/rm/etc.:
*/
- bch2_btree_iter_unlock(&iter);
+ bch2_btree_iter_unlock(iter);
- ret = check_inode(c, lostfound_inode, &iter,
+ ret = check_inode(&trans, lostfound_inode, iter,
bkey_s_c_to_inode(k), link);
BUG_ON(ret == -EINTR);
if (ret)
@@ -1325,14 +1341,15 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
nlinks_pos, link->count);
}
- if (nlinks_pos == iter.pos.inode)
+ if (nlinks_pos == iter->pos.inode)
genradix_iter_advance(&nlinks_iter, links);
- bch2_btree_iter_next(&iter);
- bch2_btree_iter_cond_resched(&iter);
+ bch2_btree_iter_next(iter);
+ bch2_btree_iter_cond_resched(iter);
}
fsck_err:
- ret2 = bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
+
if (ret2)
bch_err(c, "error in fs gc: btree error %i while walking inodes", ret2);
@@ -1378,12 +1395,18 @@ static int check_inode_nlinks(struct bch_fs *c,
noinline_for_stack
static int check_inodes_fast(struct bch_fs *c)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_c_inode inode;
int ret = 0;
- for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) {
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES,
+ POS_MIN, 0);
+
+ for_each_btree_key_continue(iter, 0, k) {
if (k.k->type != KEY_TYPE_inode)
continue;
@@ -1393,14 +1416,19 @@ static int check_inodes_fast(struct bch_fs *c)
(BCH_INODE_I_SIZE_DIRTY|
BCH_INODE_I_SECTORS_DIRTY|
BCH_INODE_UNLINKED)) {
- ret = check_inode(c, NULL, &iter, inode, NULL);
+ ret = check_inode(&trans, NULL, iter, inode, NULL);
BUG_ON(ret == -EINTR);
if (ret)
break;
}
}
- return bch2_btree_iter_unlock(&iter) ?: ret;
+ if (!ret)
+ ret = bch2_btree_iter_unlock(iter);
+
+ bch2_trans_exit(&trans);
+
+ return ret;
}
/*
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index f851e3b7..a555a8af 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -368,7 +368,8 @@ int bch2_inode_create(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_i_inode_generation delete;
struct bpos start = POS(inode_nr, 0);
struct bpos end = POS(inode_nr + 1, 0);
@@ -391,17 +392,17 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
if (ret)
return ret;
- bch2_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(inode_nr, 0),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
do {
- struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
u32 bi_generation = 0;
ret = btree_iter_err(k);
- if (ret) {
- bch2_btree_iter_unlock(&iter);
- return ret;
- }
+ if (ret)
+ break;
bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
"inode %llu not found when deleting",
@@ -432,13 +433,15 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
delete.v.bi_generation = cpu_to_le32(bi_generation);
}
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &delete.k_i));
+ bch2_trans_update(&trans,
+ BTREE_INSERT_ENTRY(iter, &delete.k_i));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL);
} while (ret == -EINTR);
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
return ret;
}
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 64637687..11b927e6 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -276,19 +276,44 @@ static void bch2_write_done(struct closure *cl)
int bch2_write_index_default(struct bch_write_op *op)
{
+ struct bch_fs *c = op->c;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct keylist *keys = &op->insert_keys;
- struct btree_iter iter;
int ret;
- bch2_btree_iter_init(&iter, op->c, BTREE_ID_EXTENTS,
- bkey_start_pos(&bch2_keylist_front(keys)->k),
- BTREE_ITER_INTENT);
+ BUG_ON(bch2_keylist_empty(keys));
+ bch2_verify_keylist_sorted(keys);
+
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ bkey_start_pos(&bch2_keylist_front(keys)->k),
+ BTREE_ITER_INTENT);
+
+ do {
+ BKEY_PADDED(k) split;
+
+ bkey_copy(&split.k, bch2_keylist_front(keys));
+
+ bch2_extent_trim_atomic(&split.k, iter);
+
+ bch2_trans_update(&trans,
+ BTREE_INSERT_ENTRY(iter, &split.k));
- ret = bch2_btree_insert_list_at(&iter, keys, &op->res,
- op_journal_seq(op),
+ ret = bch2_trans_commit(&trans, &op->res, op_journal_seq(op),
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
- bch2_btree_iter_unlock(&iter);
+ if (ret)
+ break;
+
+ if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0)
+ bch2_cut_front(iter->pos, bch2_keylist_front(keys));
+ else
+ bch2_keylist_pop_front(keys);
+ } while (!bch2_keylist_empty(keys));
+
+ bch2_trans_exit(&trans);
return ret;
}
@@ -1367,7 +1392,8 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
struct bch_fs *c = rbio->c;
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_extent *e;
BKEY_PADDED(k) new;
@@ -1378,10 +1404,13 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
if (rbio->pick.crc.compression_type)
return;
- bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, rbio->pos,
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
retry:
- k = bch2_btree_iter_peek(&iter);
+ bch2_trans_begin(&trans);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, rbio->pos,
+ BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek(iter);
if (IS_ERR_OR_NULL(k.k))
goto out;
@@ -1417,15 +1446,15 @@ retry:
if (!bch2_extent_narrow_crcs(e, new_crc))
goto out;
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_NOWAIT,
- BTREE_INSERT_ENTRY(&iter, &e->k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &e->k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_NOWAIT);
if (ret == -EINTR)
goto retry;
out:
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
static bool should_narrow_crcs(struct bkey_s_c k,
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index a58a1fb6..aabb68d2 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1027,8 +1027,6 @@ void bch2_fs_journal_start(struct journal *j)
* only have to go down with the next journal entry we write:
*/
bch2_journal_seq_blacklist_write(j);
-
- queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
}
/* init/exit: */
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index a7791518..27404311 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -825,6 +825,8 @@ fsck_err:
static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
{
+ struct btree_trans trans;
+ struct btree_iter *iter;
/*
* We might cause compressed extents to be
* split, so we need to pass in a
@@ -833,20 +835,21 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
BKEY_PADDED(k) split;
- struct btree_iter iter;
int ret;
- bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
- bkey_start_pos(&k->k),
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ bkey_start_pos(&k->k),
+ BTREE_ITER_INTENT);
do {
- ret = bch2_btree_iter_traverse(&iter);
+ ret = bch2_btree_iter_traverse(iter);
if (ret)
break;
bkey_copy(&split.k, k);
- bch2_cut_front(iter.pos, &split.k);
- bch2_extent_trim_atomic(&split.k, &iter);
+ bch2_cut_front(iter->pos, &split.k);
+ bch2_extent_trim_atomic(&split.k, iter);
ret = bch2_disk_reservation_add(c, &disk_res,
split.k.k.size *
@@ -854,13 +857,14 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
- ret = bch2_btree_insert_at(c, &disk_res, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_REPLAY,
- BTREE_INSERT_ENTRY(&iter, &split.k));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &split.k));
+ ret = bch2_trans_commit(&trans, &disk_res, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_JOURNAL_REPLAY);
} while ((!ret || ret == -EINTR) &&
- bkey_cmp(k->k.p, iter.pos));
+ bkey_cmp(k->k.p, iter->pos));
bch2_disk_reservation_put(c, &disk_res);
@@ -873,9 +877,9 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
* before journal replay finishes
*/
bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
- gc_pos_btree_node(iter.l[0].b),
+ gc_pos_btree_node(iter->l[0].b),
NULL, 0, 0);
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
return ret;
}
@@ -903,6 +907,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
ret = bch2_btree_insert(c, entry->btree_id, k,
NULL, NULL,
BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_LAZY_RW|
BTREE_INSERT_JOURNAL_REPLAY|
BTREE_INSERT_NOMARK);
break;
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index bb425d88..58d7d3a3 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -35,25 +35,29 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
{
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
BKEY_PADDED(key) tmp;
- struct btree_iter iter;
int ret = 0;
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ POS_MIN, BTREE_ITER_PREFETCH);
+
mutex_lock(&c->replicas_gc_lock);
bch2_replicas_gc_start(c, (1 << BCH_DATA_USER)|(1 << BCH_DATA_CACHED));
- bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
- POS_MIN, BTREE_ITER_PREFETCH);
- while ((k = bch2_btree_iter_peek(&iter)).k &&
+ while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = btree_iter_err(k))) {
if (!bkey_extent_is_data(k.k) ||
!bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) {
ret = bch2_mark_bkey_replicas(c, k);
if (ret)
break;
- bch2_btree_iter_next(&iter);
+ bch2_btree_iter_next(iter);
continue;
}
@@ -71,12 +75,14 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/
bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
- iter.pos = bkey_start_pos(&tmp.key.k);
+ /* XXX not sketchy at all */
+ iter->pos = bkey_start_pos(&tmp.key.k);
- ret = bch2_btree_insert_at(c, NULL, NULL,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL,
- BTREE_INSERT_ENTRY(&iter, &tmp.key));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &tmp.key));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL);
/*
* don't want to leave ret == -EINTR, since if we raced and
@@ -89,7 +95,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
break;
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
bch2_replicas_gc_end(c, ret);
mutex_unlock(&c->replicas_gc_lock);
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 98cfcefd..3315bedc 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -54,18 +54,21 @@ struct moving_context {
static int bch2_migrate_index_update(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct migrate_write *m =
container_of(op, struct migrate_write, op);
struct keylist *keys = &op->insert_keys;
- struct btree_iter iter;
int ret = 0;
- bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS,
- bkey_start_pos(&bch2_keylist_front(keys)->k),
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+ bkey_start_pos(&bch2_keylist_front(keys)->k),
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
while (1) {
- struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
+ struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
struct bkey_i_extent *insert, *new =
bkey_i_to_extent(bch2_keylist_front(keys));
BKEY_PADDED(k) _new, _insert;
@@ -74,10 +77,9 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bool did_work = false;
int nr;
- if (btree_iter_err(k)) {
- ret = bch2_btree_iter_unlock(&iter);
+ ret = btree_iter_err(k);
+ if (ret)
break;
- }
if (bversion_cmp(k.k->version, new->k.version) ||
!bkey_extent_is_data(k.k) ||
@@ -96,7 +98,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bkey_copy(&_new.k, bch2_keylist_front(keys));
new = bkey_i_to_extent(&_new.k);
- bch2_cut_front(iter.pos, &insert->k_i);
+ bch2_cut_front(iter->pos, &insert->k_i);
bch2_cut_back(new->k.p, &insert->k);
bch2_cut_back(insert->k.p, &new->k);
@@ -138,12 +140,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
if (insert->k.size < k.k->size &&
bch2_extent_is_compressed(k) &&
nr > 0) {
- /*
- * can't call bch2_disk_reservation_add() with btree
- * locks held, at least not without a song and dance
- */
- bch2_btree_iter_unlock(&iter);
-
ret = bch2_disk_reservation_add(c, &op->res,
keylist_sectors(keys) * nr, 0);
if (ret)
@@ -153,13 +149,15 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
goto next;
}
- ret = bch2_btree_insert_at(c, &op->res,
+ bch2_trans_update(&trans,
+ BTREE_INSERT_ENTRY(iter, &insert->k_i));
+
+ ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
- m->data_opts.btree_insert_flags,
- BTREE_INSERT_ENTRY(&iter, &insert->k_i));
+ m->data_opts.btree_insert_flags);
if (!ret)
atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR)
@@ -167,25 +165,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
if (ret)
break;
next:
- while (bkey_cmp(iter.pos, bch2_keylist_front(keys)->k.p) >= 0) {
+ while (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) >= 0) {
bch2_keylist_pop_front(keys);
if (bch2_keylist_empty(keys))
goto out;
}
- bch2_cut_front(iter.pos, bch2_keylist_front(keys));
+ bch2_cut_front(iter->pos, bch2_keylist_front(keys));
continue;
nomatch:
if (m->ctxt)
- atomic64_add(k.k->p.offset - iter.pos.offset,
+ atomic64_add(k.k->p.offset - iter->pos.offset,
&m->ctxt->stats->sectors_raced);
atomic_long_inc(&c->extent_migrate_raced);
trace_move_race(&new->k);
- bch2_btree_iter_next_slot(&iter);
+ bch2_btree_iter_next_slot(iter);
goto next;
}
out:
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
return ret;
}
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
index 44aacd40..6606e85c 100644
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -707,7 +707,8 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
struct qc_dqblk *qdq)
{
struct bch_fs *c = sb->s_fs_info;
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_quota new_quota;
int ret;
@@ -718,9 +719,11 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
- bch2_btree_iter_init(&iter, c, BTREE_ID_QUOTAS, new_quota.k.p,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- k = bch2_btree_iter_peek_slot(&iter);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_QUOTAS, new_quota.k.p,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+ k = bch2_btree_iter_peek_slot(iter);
ret = btree_iter_err(k);
if (unlikely(ret))
@@ -742,9 +745,11 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
if (qdq->d_fieldmask & QC_INO_HARD)
new_quota.v.c[Q_INO].hardlimit = cpu_to_le64(qdq->d_ino_hardlimit);
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &new_quota.k_i));
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &new_quota.k_i));
+
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
+
+ bch2_trans_exit(&trans);
if (ret)
return ret;
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 77ab464a..4cde23b9 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -106,10 +106,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
}
static int verify_superblock_clean(struct bch_fs *c,
- struct bch_sb_field_clean *clean,
+ struct bch_sb_field_clean **cleanp,
struct jset *j)
{
unsigned i;
+ struct bch_sb_field_clean *clean = *cleanp;
int ret = 0;
if (!clean || !j)
@@ -118,8 +119,11 @@ static int verify_superblock_clean(struct bch_fs *c,
if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
"superblock journal seq (%llu) doesn't match journal (%llu) after clean shutdown",
le64_to_cpu(clean->journal_seq),
- le64_to_cpu(j->seq)))
- bch2_fs_mark_clean(c, false);
+ le64_to_cpu(j->seq))) {
+ kfree(clean);
+ *cleanp = NULL;
+ return 0;
+ }
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
"superblock read clock doesn't match journal after clean shutdown");
@@ -186,6 +190,8 @@ int bch2_fs_recovery(struct bch_fs *c)
LIST_HEAD(journal);
struct jset *j = NULL;
unsigned i;
+ bool run_gc = c->opts.fsck ||
+ !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
int ret;
mutex_lock(&c->sb_lock);
@@ -228,7 +234,7 @@ int bch2_fs_recovery(struct bch_fs *c)
BUG_ON(ret);
}
- ret = verify_superblock_clean(c, clean, j);
+ ret = verify_superblock_clean(c, &clean, j);
if (ret)
goto err;
@@ -270,15 +276,22 @@ int bch2_fs_recovery(struct bch_fs *c)
continue;
err = "invalid btree root pointer";
+ ret = -1;
if (r->error)
goto err;
+ if (i == BTREE_ID_ALLOC &&
+ test_reconstruct_alloc(c))
+ continue;
+
err = "error reading btree root";
- if (bch2_btree_root_read(c, i, &r->key, r->level)) {
+ ret = bch2_btree_root_read(c, i, &r->key, r->level);
+ if (ret) {
if (i != BTREE_ID_ALLOC)
goto err;
mustfix_fsck_err(c, "error reading btree root");
+ run_gc = true;
}
}
@@ -299,8 +312,7 @@ int bch2_fs_recovery(struct bch_fs *c)
set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
- if (!(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
- c->opts.fsck) {
+ if (run_gc) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
ret = bch2_gc(c, &journal, true);
@@ -323,24 +335,12 @@ int bch2_fs_recovery(struct bch_fs *c)
goto out;
/*
- * Mark dirty before journal replay, fsck:
- * XXX: after a clean shutdown, this could be done lazily only when fsck
- * finds an error
- */
- bch2_fs_mark_clean(c, false);
-
- /*
* bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
* will give spurious errors about oldest_gen > bucket_gen -
* this is a hack but oh well.
*/
bch2_fs_journal_start(&c->journal);
- err = "error starting allocator";
- ret = bch2_fs_allocator_start(c);
- if (ret)
- goto err;
-
bch_verbose(c, "starting journal replay:");
err = "journal replay failed";
ret = bch2_journal_replay(c, &journal);
@@ -427,8 +427,8 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_fs_journal_start(&c->journal);
bch2_journal_set_replay_done(&c->journal);
- err = "error starting allocator";
- ret = bch2_fs_allocator_start(c);
+ err = "error going read write";
+ ret = __bch2_fs_read_write(c, true);
if (ret)
goto err;
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 99283b10..d0076bd4 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -206,22 +206,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
return __replicas_entry_idx(r, search) >= 0;
}
-bool bch2_replicas_marked(struct bch_fs *c,
+static bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
- bool marked;
-
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
- percpu_down_read_preempt_disable(&c->mark_lock);
- marked = __replicas_has_entry(&c->replicas, search) &&
+ return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
+}
+
+bool bch2_replicas_marked(struct bch_fs *c,
+ struct bch_replicas_entry *search,
+ bool check_gc_replicas)
+{
+ bool marked;
+
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
percpu_up_read_preempt_enable(&c->mark_lock);
return marked;
@@ -262,7 +269,7 @@ static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
- struct bch_fs_usage __percpu *new_scratch = NULL;
+ struct bch_fs_usage *new_scratch = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
int ret = -ENOMEM;
@@ -272,8 +279,7 @@ static int replicas_table_update(struct bch_fs *c,
(c->usage[1] &&
!(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO))) ||
- !(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64),
- GFP_NOIO)))
+ !(new_scratch = kmalloc(bytes, GFP_NOIO)))
goto err;
if (c->usage[0])
@@ -289,7 +295,7 @@ static int replicas_table_update(struct bch_fs *c,
swap(c->replicas, *new_r);
ret = 0;
err:
- free_percpu(new_scratch);
+ kfree(new_scratch);
free_percpu(new_usage[1]);
free_percpu(new_usage[0]);
return ret;
@@ -389,9 +395,9 @@ int bch2_mark_replicas(struct bch_fs *c,
: bch2_mark_replicas_slowpath(c, r);
}
-bool bch2_bkey_replicas_marked(struct bch_fs *c,
- struct bkey_s_c k,
- bool check_gc_replicas)
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
+ struct bkey_s_c k,
+ bool check_gc_replicas)
{
struct bch_replicas_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
@@ -400,13 +406,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
for (i = 0; i < cached.nr; i++) {
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
- if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
+ if (!bch2_replicas_marked_locked(c, &search.e,
+ check_gc_replicas))
return false;
}
bkey_to_replicas(&search.e, k);
- return bch2_replicas_marked(c, &search.e, check_gc_replicas);
+ return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
+}
+
+bool bch2_bkey_replicas_marked(struct bch_fs *c,
+ struct bkey_s_c k,
+ bool check_gc_replicas)
+{
+ bool marked;
+
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
+ percpu_up_read_preempt_enable(&c->mark_lock);
+
+ return marked;
}
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index 1607b7bd..ad97e3bc 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -25,6 +25,8 @@ bool bch2_replicas_marked(struct bch_fs *,
int bch2_mark_replicas(struct bch_fs *,
struct bch_replicas_entry *);
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
+ struct bkey_s_c, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index a1ca837b..f78f07bd 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -213,10 +213,10 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
}
static __always_inline
-int __bch2_hash_set(struct btree_trans *trans,
- const struct bch_hash_desc desc,
- const struct bch_hash_info *info,
- u64 inode, struct bkey_i *insert, int flags)
+int bch2_hash_set(struct btree_trans *trans,
+ const struct bch_hash_desc desc,
+ const struct bch_hash_info *info,
+ u64 inode, struct bkey_i *insert, int flags)
{
struct btree_iter *iter, *slot = NULL;
struct bkey_s_c k;
@@ -267,17 +267,6 @@ found:
return 0;
}
-static inline int bch2_hash_set(const struct bch_hash_desc desc,
- const struct bch_hash_info *info,
- struct bch_fs *c, u64 inode,
- u64 *journal_seq,
- struct bkey_i *insert, int flags)
-{
- return bch2_trans_do(c, journal_seq, flags|BTREE_INSERT_ATOMIC,
- __bch2_hash_set(&trans, desc, info,
- inode, insert, flags));
-}
-
static __always_inline
int bch2_hash_delete_at(struct btree_trans *trans,
const struct bch_hash_desc desc,
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index ca361424..9568cb46 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -502,6 +502,8 @@ reread:
if (bch2_crc_cmp(csum, sb->sb->csum))
return "bad checksum reading superblock";
+ sb->seq = le64_to_cpu(sb->sb->seq);
+
return NULL;
}
@@ -637,6 +639,27 @@ static void write_super_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
+static void read_back_super(struct bch_fs *c, struct bch_dev *ca)
+{
+ struct bch_sb *sb = ca->disk_sb.sb;
+ struct bio *bio = ca->disk_sb.bio;
+
+ bio_reset(bio);
+ bio_set_dev(bio, ca->disk_sb.bdev);
+ bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]);
+ bio->bi_iter.bi_size = 4096;
+ bio->bi_end_io = write_super_endio;
+ bio->bi_private = ca;
+ bio_set_op_attrs(bio, REQ_OP_READ, REQ_SYNC|REQ_META);
+ bch2_bio_map(bio, ca->sb_read_scratch);
+
+ this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_SB],
+ bio_sectors(bio));
+
+ percpu_ref_get(&ca->io_ref);
+ closure_bio_submit(bio, &c->sb_write);
+}
+
static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
{
struct bch_sb *sb = ca->disk_sb.sb;
@@ -666,7 +689,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx)
closure_bio_submit(bio, &c->sb_write);
}
-void bch2_write_super(struct bch_fs *c)
+int bch2_write_super(struct bch_fs *c)
{
struct closure *cl = &c->sb_write;
struct bch_dev *ca;
@@ -674,6 +697,7 @@ void bch2_write_super(struct bch_fs *c)
const char *err;
struct bch_devs_mask sb_written;
bool wrote, can_mount_without_written, can_mount_with_written;
+ int ret = 0;
lockdep_assert_held(&c->sb_lock);
@@ -689,6 +713,7 @@ void bch2_write_super(struct bch_fs *c)
err = bch2_sb_validate(&ca->disk_sb);
if (err) {
bch2_fs_inconsistent(c, "sb invalid before write: %s", err);
+ ret = -1;
goto out;
}
}
@@ -702,10 +727,27 @@ void bch2_write_super(struct bch_fs *c)
ca->sb_write_error = 0;
}
+ for_each_online_member(ca, c, i)
+ read_back_super(c, ca);
+ closure_sync(cl);
+
+ for_each_online_member(ca, c, i) {
+ if (!ca->sb_write_error &&
+ ca->disk_sb.seq !=
+ le64_to_cpu(ca->sb_read_scratch->seq)) {
+ bch2_fs_fatal_error(c,
+ "Superblock modified by another process");
+ percpu_ref_put(&ca->io_ref);
+ ret = -EROFS;
+ goto out;
+ }
+ }
+
do {
wrote = false;
for_each_online_member(ca, c, i)
- if (sb < ca->disk_sb.sb->layout.nr_superblocks) {
+ if (!ca->sb_write_error &&
+ sb < ca->disk_sb.sb->layout.nr_superblocks) {
write_one_super(c, ca, sb);
wrote = true;
}
@@ -713,9 +755,12 @@ void bch2_write_super(struct bch_fs *c)
sb++;
} while (wrote);
- for_each_online_member(ca, c, i)
+ for_each_online_member(ca, c, i) {
if (ca->sb_write_error)
__clear_bit(ca->dev_idx, sb_written.d);
+ else
+ ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq);
+ }
nr_wrote = dev_mask_nr(&sb_written);
@@ -738,13 +783,15 @@ void bch2_write_super(struct bch_fs *c)
* written anything (new filesystem), we continue if we'd be able to
* mount with the devices we did successfully write to:
*/
- bch2_fs_fatal_err_on(!nr_wrote ||
- (can_mount_without_written &&
- !can_mount_with_written), c,
- "Unable to write superblock to sufficient devices");
+ if (bch2_fs_fatal_err_on(!nr_wrote ||
+ (can_mount_without_written &&
+ !can_mount_with_written), c,
+ "Unable to write superblock to sufficient devices"))
+ ret = -1;
out:
/* Make new options visible after they're persistent: */
bch2_sb_update(c);
+ return ret;
}
/* BCH_SB_FIELD_journal: */
@@ -883,16 +930,22 @@ void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write)
bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write);
}
-static void bch2_fs_mark_dirty(struct bch_fs *c)
+int bch2_fs_mark_dirty(struct bch_fs *c)
{
+ int ret;
+
+ /*
+ * Unconditionally write superblock, to verify it hasn't changed before
+ * we go rw:
+ */
+
mutex_lock(&c->sb_lock);
- if (BCH_SB_CLEAN(c->disk_sb.sb) ||
- (c->disk_sb.sb->compat[0] & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))) {
- SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
- c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
- bch2_write_super(c);
- }
+ SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+ c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+ ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+
+ return ret;
}
struct jset_entry *
@@ -989,17 +1042,12 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
return entry;
}
-void bch2_fs_mark_clean(struct bch_fs *c, bool clean)
+void bch2_fs_mark_clean(struct bch_fs *c)
{
struct bch_sb_field_clean *sb_clean;
struct jset_entry *entry;
unsigned u64s;
- if (!clean) {
- bch2_fs_mark_dirty(c);
- return;
- }
-
mutex_lock(&c->sb_lock);
if (BCH_SB_CLEAN(c->disk_sb.sb))
goto out;
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index c48294c8..aa91b821 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -88,7 +88,7 @@ int bch2_sb_realloc(struct bch_sb_handle *, unsigned);
const char *bch2_sb_validate(struct bch_sb_handle *);
int bch2_read_super(const char *, struct bch_opts *, struct bch_sb_handle *);
-void bch2_write_super(struct bch_fs *);
+int bch2_write_super(struct bch_fs *);
/* BCH_SB_FIELD_journal: */
@@ -140,7 +140,8 @@ bch2_journal_super_entries_add_common(struct bch_fs *,
void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int);
-void bch2_fs_mark_clean(struct bch_fs *, bool);
+int bch2_fs_mark_dirty(struct bch_fs *);
+void bch2_fs_mark_clean(struct bch_fs *);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 1b389172..3bcc3240 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -289,8 +289,10 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c)
{
- if (c->state == BCH_FS_RO)
+ if (!test_bit(BCH_FS_RW, &c->flags)) {
+ cancel_delayed_work_sync(&c->journal.reclaim_work);
return;
+ }
BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags));
@@ -332,10 +334,9 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags))
- bch2_fs_mark_clean(c, true);
+ bch2_fs_mark_clean(c);
- if (c->state != BCH_FS_STOPPING)
- c->state = BCH_FS_RO;
+ clear_bit(BCH_FS_RW, &c->flags);
}
static void bch2_fs_read_only_work(struct work_struct *work)
@@ -364,55 +365,106 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
return ret;
}
-const char *bch2_fs_read_write(struct bch_fs *c)
+static int bch2_fs_read_write_late(struct bch_fs *c)
{
struct bch_dev *ca;
- const char *err = NULL;
unsigned i;
+ int ret;
- if (c->state == BCH_FS_RW)
- return NULL;
+ ret = bch2_gc_thread_start(c);
+ if (ret) {
+ bch_err(c, "error starting gc thread");
+ return ret;
+ }
+
+ for_each_rw_member(ca, c, i) {
+ ret = bch2_copygc_start(c, ca);
+ if (ret) {
+ bch_err(c, "error starting copygc threads");
+ percpu_ref_put(&ca->io_ref);
+ return ret;
+ }
+ }
+
+ ret = bch2_rebalance_start(c);
+ if (ret) {
+ bch_err(c, "error starting rebalance thread");
+ return ret;
+ }
+
+ schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
+
+ return 0;
+}
+
+int __bch2_fs_read_write(struct bch_fs *c, bool early)
+{
+ struct bch_dev *ca;
+ unsigned i;
+ int ret;
+
+ if (test_bit(BCH_FS_RW, &c->flags))
+ return 0;
- bch2_fs_mark_clean(c, false);
+ ret = bch2_fs_mark_dirty(c);
+ if (ret)
+ goto err;
for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
- err = "error starting allocator thread";
- for_each_rw_member(ca, c, i)
- if (bch2_dev_allocator_start(ca)) {
- percpu_ref_put(&ca->io_ref);
+ if (!test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) {
+ ret = bch2_fs_allocator_start(c);
+ if (ret) {
+ bch_err(c, "error initializing allocator");
goto err;
}
- set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-
- err = "error starting btree GC thread";
- if (bch2_gc_thread_start(c))
- goto err;
+ set_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags);
+ }
- err = "error starting copygc thread";
- for_each_rw_member(ca, c, i)
- if (bch2_copygc_start(c, ca)) {
+ for_each_rw_member(ca, c, i) {
+ ret = bch2_dev_allocator_start(ca);
+ if (ret) {
+ bch_err(c, "error starting allocator threads");
percpu_ref_put(&ca->io_ref);
goto err;
}
+ }
- err = "error starting rebalance thread";
- if (bch2_rebalance_start(c))
- goto err;
+ set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
- schedule_delayed_work(&c->pd_controllers_update, 5 * HZ);
+ if (!early) {
+ ret = bch2_fs_read_write_late(c);
+ if (ret)
+ goto err;
+ }
- if (c->state != BCH_FS_STARTING)
- percpu_ref_reinit(&c->writes);
+ percpu_ref_reinit(&c->writes);
+ set_bit(BCH_FS_RW, &c->flags);
- c->state = BCH_FS_RW;
- return NULL;
+ queue_delayed_work(c->journal_reclaim_wq,
+ &c->journal.reclaim_work, 0);
+ return 0;
err:
__bch2_fs_read_only(c);
- return err;
+ return ret;
+}
+
+int bch2_fs_read_write(struct bch_fs *c)
+{
+ return __bch2_fs_read_write(c, false);
+}
+
+int bch2_fs_read_write_early(struct bch_fs *c)
+{
+ lockdep_assert_held(&c->state_lock);
+
+ if (c->opts.read_only)
+ return -EROFS;
+
+ return __bch2_fs_read_write(c, true);
}
/* Filesystem startup/shutdown: */
@@ -435,7 +487,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->mark_lock);
- free_percpu(c->usage_scratch);
+ kfree(c->usage_scratch);
free_percpu(c->usage[0]);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
@@ -604,6 +656,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_reserve_cache_lock);
mutex_init(&c->btree_interior_update_lock);
+ mutex_init(&c->usage_scratch_lock);
+
mutex_init(&c->bio_bounce_pages_lock);
bio_list_init(&c->btree_write_error_list);
@@ -626,7 +680,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
c->journal.write_time = &c->times[BCH_TIME_journal_write];
c->journal.delay_time = &c->times[BCH_TIME_journal_delay];
- c->journal.blocked_time = &c->times[BCH_TIME_journal_blocked];
+ c->journal.blocked_time = &c->times[BCH_TIME_blocked_journal];
c->journal.flush_seq_time = &c->times[BCH_TIME_journal_flush_seq];
bch2_fs_btree_cache_init_early(&c->btree_cache);
@@ -668,7 +722,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcache_journal",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
- percpu_ref_init(&c->writes, bch2_writes_disabled, 0, GFP_KERNEL) ||
+ percpu_ref_init(&c->writes, bch2_writes_disabled,
+ PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->btree_reserve_pool, 1,
sizeof(struct btree_reserve)) ||
mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
@@ -742,7 +797,7 @@ const char *bch2_fs_start(struct bch_fs *c)
mutex_lock(&c->state_lock);
- BUG_ON(c->state != BCH_FS_STARTING);
+ BUG_ON(test_bit(BCH_FS_STARTED, &c->flags));
mutex_lock(&c->sb_lock);
@@ -776,9 +831,12 @@ const char *bch2_fs_start(struct bch_fs *c)
if (c->opts.read_only) {
bch2_fs_read_only(c);
} else {
- err = bch2_fs_read_write(c);
- if (err)
+ if (!test_bit(BCH_FS_RW, &c->flags)
+ ? bch2_fs_read_write(c)
+ : bch2_fs_read_write_late(c)) {
+ err = "error going read write";
goto err;
+ }
}
set_bit(BCH_FS_STARTED, &c->flags);
@@ -882,6 +940,7 @@ static void bch2_dev_free(struct bch_dev *ca)
free_percpu(ca->io_done);
bioset_exit(&ca->replica_set);
bch2_dev_buckets_free(ca);
+ kfree(ca->sb_read_scratch);
bch2_time_stats_exit(&ca->io_latency[WRITE]);
bch2_time_stats_exit(&ca->io_latency[READ]);
@@ -995,6 +1054,7 @@ static struct bch_dev *__bch2_dev_alloc(struct bch_fs *c,
0, GFP_KERNEL) ||
percpu_ref_init(&ca->io_ref, bch2_dev_io_ref_complete,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
+ !(ca->sb_read_scratch = kmalloc(4096, GFP_KERNEL)) ||
bch2_dev_buckets_alloc(c, ca) ||
bioset_init(&ca->replica_set, 4,
offsetof(struct bch_write_bio, bio), 0) ||
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index 231bc529..9bb672c4 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -217,7 +217,10 @@ struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
bool bch2_fs_emergency_read_only(struct bch_fs *);
void bch2_fs_read_only(struct bch_fs *);
-const char *bch2_fs_read_write(struct bch_fs *);
+
+int __bch2_fs_read_write(struct bch_fs *, bool);
+int bch2_fs_read_write(struct bch_fs *);
+int bch2_fs_read_write_early(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
diff --git a/libbcachefs/super_types.h b/libbcachefs/super_types.h
index ebb238aa..6277be42 100644
--- a/libbcachefs/super_types.h
+++ b/libbcachefs/super_types.h
@@ -10,6 +10,7 @@ struct bch_sb_handle {
unsigned have_layout:1;
unsigned have_bio:1;
unsigned fs_sb:1;
+ u64 seq;
};
struct bch_devs_mask {
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index b56db15d..a6d70ce5 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -288,7 +288,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf)
compressed_sectors_compressed = 0,
compressed_sectors_uncompressed = 0;
- if (!bch2_fs_running(c))
+ if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k)
@@ -481,7 +481,7 @@ STORE(__bch2_fs)
BCH_DEBUG_PARAMS()
#undef BCH_DEBUG_PARAM
- if (!bch2_fs_running(c))
+ if (!test_bit(BCH_FS_STARTED, &c->flags))
return -EPERM;
/* Debugging: */
diff --git a/libbcachefs/tests.c b/libbcachefs/tests.c
index 0b4a1143..c9362af5 100644
--- a/libbcachefs/tests.c
+++ b/libbcachefs/tests.c
@@ -27,57 +27,63 @@ static void delete_test_keys(struct bch_fs *c)
static void test_delete(struct bch_fs *c, u64 nr)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_i_cookie k;
int ret;
bkey_cookie_init(&k.k_i);
- bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
+ BTREE_ITER_INTENT);
- ret = bch2_btree_iter_traverse(&iter);
+ ret = bch2_btree_iter_traverse(iter);
BUG_ON(ret);
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &k.k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
pr_info("deleting once");
- ret = bch2_btree_delete_at(&iter, 0);
+ ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
pr_info("deleting twice");
- ret = bch2_btree_delete_at(&iter, 0);
+ ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
static void test_delete_written(struct bch_fs *c, u64 nr)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_i_cookie k;
int ret;
bkey_cookie_init(&k.k_i);
- bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS, k.k.p,
- BTREE_ITER_INTENT);
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, k.k.p,
+ BTREE_ITER_INTENT);
- ret = bch2_btree_iter_traverse(&iter);
+ ret = bch2_btree_iter_traverse(iter);
BUG_ON(ret);
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &k.k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
bch2_journal_flush_all_pins(&c->journal);
- ret = bch2_btree_delete_at(&iter, 0);
+ ret = bch2_btree_delete_at(&trans, iter, 0);
BUG_ON(ret);
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
static void test_iterate(struct bch_fs *c, u64 nr)
@@ -414,26 +420,29 @@ static void rand_mixed(struct bch_fs *c, u64 nr)
u64 i;
for (i = 0; i < nr; i++) {
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
- bch2_btree_iter_init(&iter, c, BTREE_ID_DIRENTS,
- POS(0, test_rand()), 0);
+ bch2_trans_init(&trans, c);
- k = bch2_btree_iter_peek(&iter);
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS,
+ POS(0, test_rand()), 0);
+
+ k = bch2_btree_iter_peek(iter);
if (!(i & 3) && k.k) {
struct bkey_i_cookie k;
bkey_cookie_init(&k.k_i);
- k.k.p = iter.pos;
+ k.k.p = iter->pos;
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &k.k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &k.k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
}
@@ -456,7 +465,8 @@ static void rand_delete(struct bch_fs *c, u64 nr)
static void seq_insert(struct bch_fs *c, u64 nr)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_i_cookie insert;
int ret;
@@ -464,18 +474,22 @@ static void seq_insert(struct bch_fs *c, u64 nr)
bkey_cookie_init(&insert.k_i);
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN,
- BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k) {
- insert.k.p = iter.pos;
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN,
+ BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+ for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) {
+ insert.k.p = iter->pos;
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &insert.k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &insert.k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
if (++i == nr)
break;
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
static void seq_lookup(struct bch_fs *c, u64 nr)
@@ -490,21 +504,26 @@ static void seq_lookup(struct bch_fs *c, u64 nr)
static void seq_overwrite(struct bch_fs *c, u64 nr)
{
- struct btree_iter iter;
+ struct btree_trans trans;
+ struct btree_iter *iter;
struct bkey_s_c k;
int ret;
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN,
- BTREE_ITER_INTENT, k) {
+ bch2_trans_init(&trans, c);
+
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_DIRENTS, POS_MIN,
+ BTREE_ITER_INTENT);
+
+ for_each_btree_key_continue(iter, 0, k) {
struct bkey_i_cookie u;
bkey_reassemble(&u.k_i, k);
- ret = bch2_btree_insert_at(c, NULL, NULL, 0,
- BTREE_INSERT_ENTRY(&iter, &u.k_i));
+ bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &u.k_i));
+ ret = bch2_trans_commit(&trans, NULL, NULL, 0);
BUG_ON(ret);
}
- bch2_btree_iter_unlock(&iter);
+ bch2_trans_exit(&trans);
}
static void seq_delete(struct bch_fs *c, u64 nr)
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index 4a4dba72..b204b53b 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -179,7 +179,7 @@ int bch2_xattr_set(struct btree_trans *trans, u64 inum,
memcpy(xattr->v.x_name, name, namelen);
memcpy(xattr_val(&xattr->v), value, size);
- ret = __bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
+ ret = bch2_hash_set(trans, bch2_xattr_hash_desc, hash_info,
inum, &xattr->k_i,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));