summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc_background.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/alloc_background.c')
-rw-r--r--libbcachefs/alloc_background.c255
1 files changed, 203 insertions, 52 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index ce42202f..f246319b 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -128,6 +128,34 @@ static inline void put_alloc_field(struct bkey_i_alloc *a, void **p,
*p += bytes;
}
+struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
+{
+ struct bkey_alloc_unpacked ret = { .gen = a->gen };
+ const void *d = a->data;
+ unsigned idx = 0;
+
+#define x(_name, _bits) ret._name = get_alloc_field(a, &d, idx++);
+ BCH_ALLOC_FIELDS()
+#undef x
+ return ret;
+}
+
+static void bch2_alloc_pack(struct bkey_i_alloc *dst,
+ const struct bkey_alloc_unpacked src)
+{
+ unsigned idx = 0;
+ void *d = dst->v.data;
+
+ dst->v.fields = 0;
+ dst->v.gen = src.gen;
+
+#define x(_name, _bits) put_alloc_field(dst, &d, idx++, src._name);
+ BCH_ALLOC_FIELDS()
+#undef x
+
+ set_bkey_val_bytes(&dst->k, (void *) d - (void *) &dst->v);
+}
+
static unsigned bch_alloc_val_u64s(const struct bch_alloc *a)
{
unsigned i, bytes = offsetof(struct bch_alloc, data);
@@ -173,15 +201,24 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
static void __alloc_read_key(struct bucket *g, const struct bch_alloc *a)
{
const void *d = a->data;
- unsigned idx = 0;
+ unsigned idx = 0, data_type, dirty_sectors, cached_sectors;
+ struct bucket_mark m;
- g->_mark.gen = a->gen;
- g->gen_valid = 1;
g->io_time[READ] = get_alloc_field(a, &d, idx++);
g->io_time[WRITE] = get_alloc_field(a, &d, idx++);
- g->_mark.data_type = get_alloc_field(a, &d, idx++);
- g->_mark.dirty_sectors = get_alloc_field(a, &d, idx++);
- g->_mark.cached_sectors = get_alloc_field(a, &d, idx++);
+ data_type = get_alloc_field(a, &d, idx++);
+ dirty_sectors = get_alloc_field(a, &d, idx++);
+ cached_sectors = get_alloc_field(a, &d, idx++);
+ g->oldest_gen = get_alloc_field(a, &d, idx++);
+
+ bucket_cmpxchg(g, m, ({
+ m.gen = a->gen;
+ m.data_type = data_type;
+ m.dirty_sectors = dirty_sectors;
+ m.cached_sectors = cached_sectors;
+ }));
+
+ g->gen_valid = 1;
}
static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
@@ -199,6 +236,7 @@ static void __alloc_write_key(struct bkey_i_alloc *a, struct bucket *g,
put_alloc_field(a, &d, idx++, m.data_type);
put_alloc_field(a, &d, idx++, m.dirty_sectors);
put_alloc_field(a, &d, idx++, m.cached_sectors);
+ put_alloc_field(a, &d, idx++, g->oldest_gen);
set_bkey_val_bytes(&a->k, (void *) d - (void *) &a->v);
}
@@ -315,6 +353,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE|
+ BTREE_INSERT_NOMARK|
flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
if (ret)
@@ -358,7 +397,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
? 0
: bch2_btree_insert_at(c, NULL, NULL,
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_JOURNAL_REPLAY,
+ BTREE_INSERT_JOURNAL_REPLAY|
+ BTREE_INSERT_NOMARK,
BTREE_INSERT_ENTRY(&iter, k));
err:
bch2_btree_iter_unlock(&iter);
@@ -824,6 +864,142 @@ static inline long next_alloc_bucket(struct bch_dev *ca)
return -1;
}
+/*
+ * returns sequence number of most recent journal entry that updated this
+ * bucket:
+ */
+static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
+{
+ if (m.journal_seq_valid) {
+ u64 journal_seq = atomic64_read(&c->journal.seq);
+ u64 bucket_seq = journal_seq;
+
+ bucket_seq &= ~((u64) U16_MAX);
+ bucket_seq |= m.journal_seq;
+
+ if (bucket_seq > journal_seq)
+ bucket_seq -= 1 << 16;
+
+ return bucket_seq;
+ } else {
+ return 0;
+ }
+}
+
+static int bch2_invalidate_one_bucket2(struct bch_fs *c, struct bch_dev *ca,
+ struct btree_iter *iter,
+ u64 *journal_seq, unsigned flags)
+{
+#if 0
+ __BKEY_PADDED(k, BKEY_ALLOC_VAL_U64s_MAX) alloc_key;
+#else
+ /* hack: */
+ __BKEY_PADDED(k, 8) alloc_key;
+#endif
+ struct bkey_i_alloc *a;
+ struct bkey_alloc_unpacked u;
+ struct bucket_mark m;
+ struct bkey_s_c k;
+ bool invalidating_cached_data;
+ size_t b;
+ int ret;
+
+ BUG_ON(!ca->alloc_heap.used ||
+ !ca->alloc_heap.data[0].nr);
+ b = ca->alloc_heap.data[0].bucket;
+
+ /* first, put on free_inc and mark as owned by allocator: */
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ spin_lock(&c->freelist_lock);
+
+ verify_not_on_freelist(c, ca, b);
+
+ BUG_ON(!fifo_push(&ca->free_inc, b));
+
+ bch2_mark_alloc_bucket(c, ca, b, true, gc_pos_alloc(c, NULL), 0);
+ m = bucket(ca, b)->mark;
+
+ spin_unlock(&c->freelist_lock);
+ percpu_up_read_preempt_enable(&c->mark_lock);
+
+ bch2_btree_iter_cond_resched(iter);
+
+ BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
+
+ bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
+retry:
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = btree_iter_err(k);
+ if (ret)
+ return ret;
+
+ if (k.k && k.k->type == KEY_TYPE_alloc)
+ u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+ else
+ memset(&u, 0, sizeof(u));
+
+ invalidating_cached_data = u.cached_sectors != 0;
+
+ //BUG_ON(u.dirty_sectors);
+ u.data_type = 0;
+ u.dirty_sectors = 0;
+ u.cached_sectors = 0;
+ u.read_time = c->bucket_clock[READ].hand;
+ u.write_time = c->bucket_clock[WRITE].hand;
+ u.gen++;
+
+ a = bkey_alloc_init(&alloc_key.k);
+ a->k.p = iter->pos;
+ bch2_alloc_pack(a, u);
+
+ ret = bch2_btree_insert_at(c, NULL,
+ invalidating_cached_data ? journal_seq : NULL,
+ BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOCHECK_RW|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE|
+ flags,
+ BTREE_INSERT_ENTRY(iter, &a->k_i));
+ if (ret == -EINTR)
+ goto retry;
+
+ if (!ret) {
+ /* remove from alloc_heap: */
+ struct alloc_heap_entry e, *top = ca->alloc_heap.data;
+
+ top->bucket++;
+ top->nr--;
+
+ if (!top->nr)
+ heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
+
+ /*
+ * Make sure we flush the last journal entry that updated this
+ * bucket (i.e. deleting the last reference) before writing to
+ * this bucket again:
+ */
+ *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
+ } else {
+ size_t b2;
+
+ /* remove from free_inc: */
+ percpu_down_read_preempt_disable(&c->mark_lock);
+ spin_lock(&c->freelist_lock);
+
+ bch2_mark_alloc_bucket(c, ca, b, false,
+ gc_pos_alloc(c, NULL), 0);
+
+ BUG_ON(!fifo_pop_back(&ca->free_inc, b2));
+ BUG_ON(b != b2);
+
+ spin_unlock(&c->freelist_lock);
+ percpu_up_read_preempt_enable(&c->mark_lock);
+ }
+
+ return ret;
+}
+
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t bucket, u64 *flush_seq)
{
@@ -844,18 +1020,7 @@ static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
percpu_up_read_preempt_enable(&c->mark_lock);
- if (m.journal_seq_valid) {
- u64 journal_seq = atomic64_read(&c->journal.seq);
- u64 bucket_seq = journal_seq;
-
- bucket_seq &= ~((u64) U16_MAX);
- bucket_seq |= m.journal_seq;
-
- if (bucket_seq > journal_seq)
- bucket_seq -= 1 << 16;
-
- *flush_seq = max(*flush_seq, bucket_seq);
- }
+ *flush_seq = max(*flush_seq, bucket_journal_seq(c, m));
return m.cached_sectors != 0;
}
@@ -868,7 +1033,6 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
struct btree_iter iter;
u64 journal_seq = 0;
int ret = 0;
- long b;
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
@@ -876,14 +1040,11 @@ static int bch2_invalidate_buckets(struct bch_fs *c, struct bch_dev *ca)
/* Only use nowait if we've already invalidated at least one bucket: */
while (!ret &&
!fifo_full(&ca->free_inc) &&
- (b = next_alloc_bucket(ca)) >= 0) {
- bool must_flush =
- bch2_invalidate_one_bucket(c, ca, b, &journal_seq);
-
- ret = __bch2_alloc_write_key(c, ca, b, &iter,
- must_flush ? &journal_seq : NULL,
- !fifo_empty(&ca->free_inc) ? BTREE_INSERT_NOWAIT : 0);
- }
+ ca->alloc_heap.used)
+ ret = bch2_invalidate_one_bucket2(c, ca, &iter, &journal_seq,
+ BTREE_INSERT_GC_LOCK_HELD|
+ (!fifo_empty(&ca->free_inc)
+ ? BTREE_INSERT_NOWAIT : 0));
bch2_btree_iter_unlock(&iter);
@@ -1305,24 +1466,16 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
return 0;
}
-static void flush_held_btree_writes(struct bch_fs *c)
+static bool flush_done(struct bch_fs *c)
{
struct bucket_table *tbl;
struct rhash_head *pos;
struct btree *b;
- bool nodes_blocked;
+ bool nodes_unwritten;
size_t i;
- struct closure cl;
-
- closure_init_stack(&cl);
-
- clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
again:
- pr_debug("flushing dirty btree nodes");
cond_resched();
- closure_wait(&c->btree_interior_update_wait, &cl);
-
- nodes_blocked = false;
+ nodes_unwritten = false;
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos)
@@ -1334,24 +1487,25 @@ again:
six_unlock_read(&b->lock);
goto again;
} else {
- nodes_blocked = true;
+ nodes_unwritten = true;
}
}
rcu_read_unlock();
- if (c->btree_roots_dirty)
+ if (c->btree_roots_dirty) {
bch2_journal_meta(&c->journal);
-
- if (nodes_blocked) {
- closure_sync(&cl);
goto again;
}
- closure_wake_up(&c->btree_interior_update_wait);
- closure_sync(&cl);
+ return !nodes_unwritten &&
+ !bch2_btree_interior_updates_nr_pending(c);
+}
- closure_wait_event(&c->btree_interior_update_wait,
- !bch2_btree_interior_updates_nr_pending(c));
+static void flush_held_btree_writes(struct bch_fs *c)
+{
+ clear_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags);
+
+ closure_wait_event(&c->btree_interior_update_wait, flush_done(c));
}
static void allocator_start_issue_discards(struct bch_fs *c)
@@ -1470,7 +1624,6 @@ not_enough:
&journal_seq);
fifo_push(&ca->free[RESERVE_BTREE], bu);
- bucket_set_dirty(ca, bu);
}
}
@@ -1517,7 +1670,6 @@ int bch2_fs_allocator_start(struct bch_fs *c)
{
struct bch_dev *ca;
unsigned i;
- bool wrote;
int ret;
down_read(&c->gc_lock);
@@ -1536,8 +1688,7 @@ int bch2_fs_allocator_start(struct bch_fs *c)
}
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
-
- return bch2_alloc_write(c, false, &wrote);
+ return 0;
}
void bch2_fs_allocator_background_init(struct bch_fs *c)