summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--cmd_migrate.c3
-rw-r--r--libbcachefs/alloc_background.c153
-rw-r--r--libbcachefs/alloc_background.h5
-rw-r--r--libbcachefs/alloc_foreground.c2
-rw-r--r--libbcachefs/btree_gc.c7
-rw-r--r--libbcachefs/btree_types.h2
-rw-r--r--libbcachefs/buckets.c6
-rw-r--r--libbcachefs/buckets.h6
-rw-r--r--libbcachefs/ec.c5
-rw-r--r--libbcachefs/ec.h2
-rw-r--r--libbcachefs/fs-io.c2
-rw-r--r--libbcachefs/fs.c2
-rw-r--r--libbcachefs/io.c16
-rw-r--r--libbcachefs/io.h6
-rw-r--r--libbcachefs/move.c7
-rw-r--r--libbcachefs/recovery.c38
-rw-r--r--libbcachefs/super.c65
-rw-r--r--libbcachefs/super.h1
19 files changed, 173 insertions, 157 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index d4dc4ead..59bf491a 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-0568ed488651273d01891c3481613dd652677edb
+26c226917f0455877387c1a325282e67e3283f54
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 998275a0..797c51e0 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -596,8 +596,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
darray_free(s.extents);
genradix_free(&s.hardlinks);
- bool wrote;
- bch2_alloc_write(c, false, &wrote);
+ bch2_alloc_write(c, false);
}
static void find_superblock_space(ranges extents, struct dev_opts *dev)
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 9aa0b42b..54096e83 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -209,10 +209,25 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
static int bch2_alloc_read_fn(struct bch_fs *c, enum btree_id id,
unsigned level, struct bkey_s_c k)
{
- if (!level)
- bch2_mark_key(c, k, 0, 0, NULL, 0,
- BTREE_TRIGGER_ALLOC_READ|
- BTREE_TRIGGER_NOATOMIC);
+ struct bch_dev *ca;
+ struct bucket *g;
+ struct bkey_alloc_unpacked u;
+
+ if (level || k.k->type != KEY_TYPE_alloc)
+ return 0;
+
+ ca = bch_dev_bkey_exists(c, k.k->p.inode);
+ g = __bucket(ca, k.k->p.offset, 0);
+ u = bch2_alloc_unpack(k);
+
+ g->_mark.gen = u.gen;
+ g->_mark.data_type = u.data_type;
+ g->_mark.dirty_sectors = u.dirty_sectors;
+ g->_mark.cached_sectors = u.cached_sectors;
+ g->io_time[READ] = u.read_time;
+ g->io_time[WRITE] = u.write_time;
+ g->oldest_gen = u.oldest_gen;
+ g->gen_valid = 1;
return 0;
}
@@ -223,8 +238,11 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
unsigned i;
int ret = 0;
+ down_read(&c->gc_lock);
ret = bch2_btree_and_journal_walk(c, journal_keys, BTREE_ID_ALLOC,
NULL, bch2_alloc_read_fn);
+ up_read(&c->gc_lock);
+
if (ret) {
bch_err(c, "error reading alloc info: %i", ret);
return ret;
@@ -253,12 +271,6 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
return 0;
}
-enum alloc_write_ret {
- ALLOC_WROTE,
- ALLOC_NOWROTE,
- ALLOC_END,
-};
-
static int bch2_alloc_write_key(struct btree_trans *trans,
struct btree_iter *iter,
unsigned flags)
@@ -288,26 +300,17 @@ retry:
old_u = bch2_alloc_unpack(k);
- if (iter->pos.inode >= c->sb.nr_devices ||
- !c->devs[iter->pos.inode])
- return ALLOC_END;
-
percpu_down_read(&c->mark_lock);
ca = bch_dev_bkey_exists(c, iter->pos.inode);
ba = bucket_array(ca);
- if (iter->pos.offset >= ba->nbuckets) {
- percpu_up_read(&c->mark_lock);
- return ALLOC_END;
- }
-
g = &ba->b[iter->pos.offset];
m = READ_ONCE(g->mark);
new_u = alloc_mem_to_key(g, m);
percpu_up_read(&c->mark_lock);
if (!bkey_alloc_unpacked_cmp(old_u, new_u))
- return ALLOC_NOWROTE;
+ return 0;
a = bkey_alloc_init(&alloc_key.k);
a->k.p = iter->pos;
@@ -325,50 +328,55 @@ err:
return ret;
}
-int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
+int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
{
struct btree_trans trans;
struct btree_iter *iter;
- struct bch_dev *ca;
- unsigned i;
+ u64 first_bucket, nbuckets;
int ret = 0;
+ percpu_down_read(&c->mark_lock);
+ first_bucket = bucket_array(ca)->first_bucket;
+ nbuckets = bucket_array(ca)->nbuckets;
+ percpu_up_read(&c->mark_lock);
+
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
- iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
+ iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
+ POS(ca->dev_idx, first_bucket),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- for_each_rw_member(ca, c, i) {
- unsigned first_bucket;
+ while (iter->pos.offset < nbuckets) {
+ bch2_trans_cond_resched(&trans);
- percpu_down_read(&c->mark_lock);
- first_bucket = bucket_array(ca)->first_bucket;
- percpu_up_read(&c->mark_lock);
+ ret = bch2_alloc_write_key(&trans, iter, flags);
+ if (ret)
+ break;
+ bch2_btree_iter_next_slot(iter);
+ }
- bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
+ bch2_trans_exit(&trans);
- while (1) {
- bch2_trans_cond_resched(&trans);
+ return ret;
+}
- ret = bch2_alloc_write_key(&trans, iter, flags);
- if (ret < 0 || ret == ALLOC_END)
- break;
- if (ret == ALLOC_WROTE)
- *wrote = true;
- bch2_btree_iter_next_slot(iter);
- }
+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
+{
+ struct bch_dev *ca;
+ unsigned i;
+ int ret = 0;
- if (ret < 0) {
+ for_each_rw_member(ca, c, i) {
+ bch2_dev_alloc_write(c, ca, flags);
+ if (ret) {
percpu_ref_put(&ca->io_ref);
break;
}
}
- bch2_trans_exit(&trans);
-
- return ret < 0 ? ret : 0;
+ return ret;
}
/* Bucket IO clocks: */
@@ -481,6 +489,53 @@ static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
mutex_init(&clock->lock);
}
+int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
+ size_t bucket_nr, int rw)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+ struct btree_iter *iter;
+ struct bucket *g;
+ struct bkey_i_alloc *a;
+ struct bkey_alloc_unpacked u;
+ u16 *time;
+ int ret = 0;
+
+ iter = bch2_trans_get_iter(trans, BTREE_ID_ALLOC, POS(dev, bucket_nr),
+ BTREE_ITER_CACHED|
+ BTREE_ITER_CACHED_NOFILL|
+ BTREE_ITER_INTENT);
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+ ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ goto out;
+
+ percpu_down_read(&c->mark_lock);
+ g = bucket(ca, bucket_nr);
+ u = alloc_mem_to_key(g, READ_ONCE(g->mark));
+ percpu_up_read(&c->mark_lock);
+
+ bkey_alloc_init(&a->k_i);
+ a->k.p = iter->pos;
+
+ time = rw == READ ? &u.read_time : &u.write_time;
+ if (*time == c->bucket_clock[rw].hand)
+ goto out;
+
+ *time = c->bucket_clock[rw].hand;
+
+ bch2_alloc_pack(a, u);
+
+ ret = bch2_trans_update(trans, iter, &a->k_i, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, 0);
+out:
+ bch2_trans_iter_put(trans, iter);
+ return ret;
+}
+
/* Background allocator thread: */
/*
@@ -1259,18 +1314,6 @@ void bch2_recalc_capacity(struct bch_fs *c)
c->bucket_size_max = bucket_size_max;
- if (c->capacity) {
- bch2_io_timer_add(&c->io_clock[READ],
- &c->bucket_clock[READ].rescale);
- bch2_io_timer_add(&c->io_clock[WRITE],
- &c->bucket_clock[WRITE].rescale);
- } else {
- bch2_io_timer_del(&c->io_clock[READ],
- &c->bucket_clock[READ].rescale);
- bch2_io_timer_del(&c->io_clock[WRITE],
- &c->bucket_clock[WRITE].rescale);
- }
-
/* Wake up case someone was waiting for buckets */
closure_wake_up(&c->freelist_wait);
}
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 4f462696..870714ff 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -28,6 +28,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
void bch2_alloc_pack(struct bkey_i_alloc *,
const struct bkey_alloc_unpacked);
+int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
+
static inline struct bkey_alloc_unpacked
alloc_mem_to_key(struct bucket *g, struct bucket_mark m)
{
@@ -93,7 +95,8 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *);
-int bch2_alloc_write(struct bch_fs *, unsigned, bool *);
+int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
+int bch2_alloc_write(struct bch_fs *, unsigned);
void bch2_fs_allocator_background_init(struct bch_fs *);
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 4a048828..7a92e3d5 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -309,8 +309,6 @@ out:
.dev = ca->dev_idx,
};
- bucket_io_clock_reset(c, ca, bucket, READ);
- bucket_io_clock_reset(c, ca, bucket, WRITE);
spin_unlock(&ob->lock);
if (c->blocked_allocate_open_bucket) {
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 2aa8140a..e8c1e752 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -37,9 +37,11 @@
static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
{
+ preempt_disable();
write_seqcount_begin(&c->gc_pos_lock);
c->gc_pos = new_pos;
write_seqcount_end(&c->gc_pos_lock);
+ preempt_enable();
}
static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
@@ -568,6 +570,7 @@ static int bch2_gc_done(struct bch_fs *c,
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
+ ret = 1; \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
@@ -578,6 +581,7 @@ static int bch2_gc_done(struct bch_fs *c,
dst->_f, src->_f); \
dst->_f = src->_f; \
dst->dirty = true; \
+ ret = 1; \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
@@ -588,6 +592,7 @@ static int bch2_gc_done(struct bch_fs *c,
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
+ ret = 1; \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@@ -1394,7 +1399,7 @@ static int bch2_gc_thread(void *arg)
#else
ret = bch2_gc_gens(c);
#endif
- if (ret)
+ if (ret < 0)
bch_err(c, "btree gc failed: %i", ret);
debug_check_no_locks_held();
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 683b416e..c1717b7c 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -602,7 +602,6 @@ enum btree_trigger_flags {
__BTREE_TRIGGER_GC,
__BTREE_TRIGGER_BUCKET_INVALIDATE,
- __BTREE_TRIGGER_ALLOC_READ,
__BTREE_TRIGGER_NOATOMIC,
};
@@ -614,7 +613,6 @@ enum btree_trigger_flags {
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-#define BTREE_TRIGGER_ALLOC_READ (1U << __BTREE_TRIGGER_ALLOC_READ)
#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
static inline bool btree_node_type_needs_gc(enum btree_node_type type)
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 79711435..c3fc3abb 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -254,6 +254,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
BUG_ON(idx >= 2);
+ preempt_disable();
write_seqcount_begin(&c->usage_lock);
acc_u64s_percpu((u64 *) c->usage_base,
@@ -261,6 +262,7 @@ void bch2_fs_usage_acc_to_base(struct bch_fs *c, unsigned idx)
percpu_memset(c->usage[idx], 0, u64s * sizeof(u64));
write_seqcount_end(&c->usage_lock);
+ preempt_enable();
}
void bch2_fs_usage_to_text(struct printbuf *out,
@@ -482,6 +484,7 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
+__flatten
void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
struct bch_dev *ca;
@@ -755,8 +758,7 @@ static int bch2_mark_alloc(struct bch_fs *c,
}
}));
- if (!(flags & BTREE_TRIGGER_ALLOC_READ))
- bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
+ bch2_dev_usage_update(c, ca, fs_usage, old_m, m, gc);
g->io_time[READ] = u.read_time;
g->io_time[WRITE] = u.write_time;
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 653f6761..a3873bec 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -58,12 +58,6 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
return __bucket(ca, b, false);
}
-static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
- size_t b, int rw)
-{
- bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
-}
-
static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
{
return c->bucket_clock[rw].hand - g->io_time[rw];
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 5514f653..eac750ad 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1448,7 +1448,7 @@ static int __bch2_stripe_write_key(struct btree_trans *trans,
return 0;
}
-int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
+int bch2_stripes_write(struct bch_fs *c, unsigned flags)
{
struct btree_trans trans;
struct btree_iter *iter;
@@ -1476,8 +1476,6 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags, bool *wrote)
if (ret)
break;
-
- *wrote = true;
}
bch2_trans_exit(&trans);
@@ -1497,7 +1495,6 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
bch2_mark_key(c, k, 0, 0, NULL, 0,
- BTREE_TRIGGER_ALLOC_READ|
BTREE_TRIGGER_NOATOMIC);
if (ret)
return ret;
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index f8fc3d61..6db16cf7 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -156,7 +156,7 @@ void bch2_ec_flush_new_stripes(struct bch_fs *);
struct journal_keys;
int bch2_stripes_read(struct bch_fs *, struct journal_keys *);
-int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
+int bch2_stripes_write(struct bch_fs *, unsigned);
int bch2_ec_mem_alloc(struct bch_fs *, bool);
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 60684380..4ceeafcf 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -868,7 +868,7 @@ retry:
if (bkey_extent_is_allocation(k.k))
bch2_add_page_sectors(&rbio->bio, k);
- bch2_read_extent(c, rbio, k, offset_into_extent, flags);
+ bch2_read_extent(trans, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
break;
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index 5c80142e..6a9820e8 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -1527,8 +1527,6 @@ got_sb:
if (ret)
goto err_put_super;
- sb->s_bdi->congested_fn = bch2_congested;
- sb->s_bdi->congested_data = c;
sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
for_each_online_member(ca, c, i) {
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 5c9c3cf5..0a4b4eed 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -7,6 +7,7 @@
*/
#include "bcachefs.h"
+#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_on_stack.h"
#include "bset.h"
@@ -1635,7 +1636,7 @@ retry:
goto out;
}
- ret = __bch2_read_extent(c, rbio, bvec_iter, k, 0, failed, flags);
+ ret = __bch2_read_extent(&trans, rbio, bvec_iter, k, 0, failed, flags);
if (ret == READ_RETRY)
goto retry;
if (ret)
@@ -1692,7 +1693,7 @@ retry:
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
- ret = __bch2_read_extent(c, rbio, bvec_iter, k,
+ ret = __bch2_read_extent(&trans, rbio, bvec_iter, k,
offset_into_extent, failed, flags);
switch (ret) {
case READ_RETRY:
@@ -2020,11 +2021,12 @@ err:
return ret;
}
-int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
+int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
unsigned offset_into_extent,
struct bch_io_failures *failed, unsigned flags)
{
+ struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
struct bch_dev *ca;
@@ -2192,9 +2194,9 @@ get_bio:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
- rcu_read_lock();
- bucket_io_clock_reset(c, ca, PTR_BUCKET_NR(ca, &pick.ptr), READ);
- rcu_read_unlock();
+ if (pick.ptr.cached)
+ bch2_bucket_io_time_reset(trans, pick.ptr.dev,
+ PTR_BUCKET_NR(ca, &pick.ptr), READ);
if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) {
bio_inc_remaining(&orig->bio);
@@ -2336,7 +2338,7 @@ retry:
if (rbio->bio.bi_iter.bi_size == bytes)
flags |= BCH_READ_LAST_FRAGMENT;
- bch2_read_extent(c, rbio, k, offset_into_extent, flags);
+ bch2_read_extent(&trans, rbio, k, offset_into_extent, flags);
if (flags & BCH_READ_LAST_FRAGMENT)
break;
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
index ded468d7..e6aac594 100644
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -136,17 +136,17 @@ enum bch_read_flags {
BCH_READ_IN_RETRY = 1 << 7,
};
-int __bch2_read_extent(struct bch_fs *, struct bch_read_bio *,
+int __bch2_read_extent(struct btree_trans *, struct bch_read_bio *,
struct bvec_iter, struct bkey_s_c, unsigned,
struct bch_io_failures *, unsigned);
-static inline void bch2_read_extent(struct bch_fs *c,
+static inline void bch2_read_extent(struct btree_trans *trans,
struct bch_read_bio *rbio,
struct bkey_s_c k,
unsigned offset_into_extent,
unsigned flags)
{
- __bch2_read_extent(c, rbio, rbio->bio.bi_iter, k,
+ __bch2_read_extent(trans, rbio, rbio->bio.bi_iter, k,
offset_into_extent, NULL, flags);
}
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 1ffb14a2..62dcac79 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -415,7 +415,7 @@ static void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt)
atomic_read(&ctxt->write_sectors) != sectors_pending);
}
-static int bch2_move_extent(struct bch_fs *c,
+static int bch2_move_extent(struct btree_trans *trans,
struct moving_context *ctxt,
struct write_point_specifier wp,
struct bch_io_opts io_opts,
@@ -424,6 +424,7 @@ static int bch2_move_extent(struct bch_fs *c,
enum data_cmd data_cmd,
struct data_opts data_opts)
{
+ struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
struct moving_io *io;
const union bch_extent_entry *entry;
@@ -490,7 +491,7 @@ static int bch2_move_extent(struct bch_fs *c,
* ctxt when doing wakeup
*/
closure_get(&ctxt->cl);
- bch2_read_extent(c, &io->rbio, k, 0,
+ bch2_read_extent(trans, &io->rbio, k, 0,
BCH_READ_NODECODE|
BCH_READ_LAST_FRAGMENT);
return 0;
@@ -608,7 +609,7 @@ peek:
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
- ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
+ ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
if (ret2) {
if (ret2 == -ENOMEM) {
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 6e829bf0..d70fa968 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -845,9 +845,11 @@ static int verify_superblock_clean(struct bch_fs *c,
}
mustfix_fsck_err_on(j->read_clock != clean->read_clock, c,
- "superblock read clock doesn't match journal after clean shutdown");
+ "superblock read clock %u doesn't match journal %u after clean shutdown",
+ clean->read_clock, j->read_clock);
mustfix_fsck_err_on(j->write_clock != clean->write_clock, c,
- "superblock read clock doesn't match journal after clean shutdown");
+ "superblock write clock %u doesn't match journal %u after clean shutdown",
+ clean->write_clock, j->write_clock);
for (i = 0; i < BTREE_ID_NR; i++) {
char buf1[200], buf2[200];
@@ -961,7 +963,7 @@ int bch2_fs_recovery(struct bch_fs *c)
const char *err = "cannot allocate memory";
struct bch_sb_field_clean *clean = NULL;
u64 journal_seq;
- bool wrote = false, write_sb = false;
+ bool write_sb = false, need_write_alloc = false;
int ret;
if (c->sb.clean)
@@ -1090,8 +1092,10 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "starting metadata mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, true);
- if (ret)
+ if (ret < 0)
goto err;
+ if (ret)
+ need_write_alloc = true;
bch_verbose(c, "mark and sweep done");
}
@@ -1101,8 +1105,10 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "starting mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, false);
- if (ret)
+ if (ret < 0)
goto err;
+ if (ret)
+ need_write_alloc = true;
bch_verbose(c, "mark and sweep done");
}
@@ -1126,7 +1132,7 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
bch_verbose(c, "journal replay done");
- if (!c->opts.nochanges) {
+ if (need_write_alloc && !c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
@@ -1134,8 +1140,8 @@ int bch2_fs_recovery(struct bch_fs *c)
*/
bch_verbose(c, "writing allocation info");
err = "error writing out alloc info";
- ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
- bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
+ ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW) ?:
+ bch2_alloc_write(c, BTREE_INSERT_LAZY_RW);
if (ret) {
bch_err(c, "error writing alloc info");
goto err;
@@ -1281,6 +1287,20 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_fs_journal_start(&c->journal, 1, &journal);
bch2_journal_set_replay_done(&c->journal);
+ err = "error going read-write";
+ ret = bch2_fs_read_write_early(c);
+ if (ret)
+ goto err;
+
+ /*
+ * Write out the superblock and journal buckets, now that we can do
+ * btree updates
+ */
+ err = "error writing alloc info";
+ ret = bch2_alloc_write(c, 0);
+ if (ret)
+ goto err;
+
bch2_inode_init(c, &root_inode, 0, 0,
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
@@ -1289,7 +1309,7 @@ int bch2_fs_initialize(struct bch_fs *c)
err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_INODES,
&packed_inode.inode.k_i,
- NULL, NULL, BTREE_INSERT_LAZY_RW);
+ NULL, NULL, 0);
if (ret)
goto err;
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index c873b671..7f301fa6 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -149,44 +149,6 @@ struct bch_fs *bch2_uuid_to_fs(uuid_le uuid)
return c;
}
-int bch2_congested(void *data, int bdi_bits)
-{
- struct bch_fs *c = data;
- struct backing_dev_info *bdi;
- struct bch_dev *ca;
- unsigned i;
- int ret = 0;
-
- rcu_read_lock();
- if (bdi_bits & (1 << WB_sync_congested)) {
- /* Reads - check all devices: */
- for_each_readable_member(ca, c, i) {
- bdi = ca->disk_sb.bdev->bd_bdi;
-
- if (bdi_congested(bdi, bdi_bits)) {
- ret = 1;
- break;
- }
- }
- } else {
- const struct bch_devs_mask *devs =
- bch2_target_to_mask(c, c->opts.foreground_target) ?:
- &c->rw_devs[BCH_DATA_user];
-
- for_each_member_device_rcu(ca, c, i, devs) {
- bdi = ca->disk_sb.bdev->bd_bdi;
-
- if (bdi_congested(bdi, bdi_bits)) {
- ret = 1;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- return ret;
-}
-
/* Filesystem RO/RW: */
/*
@@ -207,9 +169,7 @@ int bch2_congested(void *data, int bdi_bits)
static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
- bool wrote = false;
unsigned i, clean_passes = 0;
- int ret;
bch2_rebalance_stop(c);
bch2_copygc_stop(c);
@@ -228,20 +188,6 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (!test_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags))
goto nowrote_alloc;
- bch_verbose(c, "writing alloc info");
- /*
- * This should normally just be writing the bucket read/write clocks:
- */
- ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
- bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
- bch_verbose(c, "writing alloc info complete");
-
- if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
- bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
-
- if (ret)
- goto nowrote_alloc;
-
bch_verbose(c, "flushing journal and stopping allocators");
bch2_journal_flush_all_pins(&c->journal);
@@ -278,6 +224,9 @@ nowrote_alloc:
for_each_member_device(ca, c, i)
bch2_dev_allocator_stop(ca);
+ bch2_io_timer_del(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+ bch2_io_timer_del(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
@@ -454,6 +403,9 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
bch2_dev_allocator_add(c, ca);
bch2_recalc_capacity(c);
+ bch2_io_timer_add(&c->io_clock[READ], &c->bucket_clock[READ].rescale);
+ bch2_io_timer_add(&c->io_clock[WRITE], &c->bucket_clock[WRITE].rescale);
+
for_each_rw_member(ca, c, i) {
ret = bch2_dev_allocator_start(ca);
if (ret) {
@@ -1701,6 +1653,11 @@ have_slot:
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ err = "alloc write failed";
+ ret = bch2_dev_alloc_write(c, ca, 0);
+ if (ret)
+ goto err;
+
if (ca->mi.state == BCH_MEMBER_STATE_RW) {
err = __bch2_dev_read_write(c, ca);
if (err)
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index 048ffec6..02c81f35 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -199,7 +199,6 @@ static inline struct bch_devs_mask bch2_online_devs(struct bch_fs *c)
struct bch_fs *bch2_bdev_to_fs(struct block_device *);
struct bch_fs *bch2_uuid_to_fs(uuid_le);
-int bch2_congested(void *, int);
bool bch2_dev_state_allowed(struct bch_fs *, struct bch_dev *,
enum bch_member_state, int);