summaryrefslogtreecommitdiff
path: root/libbcachefs/alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/alloc.c')
-rw-r--r--libbcachefs/alloc.c92
1 files changed, 55 insertions, 37 deletions
diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c
index 44f9479e..ac2c7d1f 100644
--- a/libbcachefs/alloc.c
+++ b/libbcachefs/alloc.c
@@ -223,7 +223,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
if (a.k->p.offset >= ca->mi.nbuckets)
return;
- lg_local_lock(&c->usage_lock);
+ percpu_down_read_preempt_disable(&c->usage_lock);
g = bucket(ca, a.k->p.offset);
bucket_cmpxchg(g, new, ({
@@ -237,7 +237,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
g->io_time[WRITE] = get_alloc_field(&d, 2);
- lg_local_unlock(&c->usage_lock);
+ percpu_up_read_preempt_enable(&c->usage_lock);
}
int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
@@ -288,7 +288,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct btree_iter *iter,
- u64 *journal_seq)
+ u64 *journal_seq, bool nowait)
{
struct bucket_mark m;
__BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key;
@@ -296,6 +296,13 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
struct bkey_i_alloc *a;
u8 *d;
int ret;
+ unsigned flags = BTREE_INSERT_ATOMIC|
+ BTREE_INSERT_NOFAIL|
+ BTREE_INSERT_USE_RESERVE|
+ BTREE_INSERT_USE_ALLOC_RESERVE;
+
+ if (nowait)
+ flags |= BTREE_INSERT_NOWAIT;
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
@@ -304,7 +311,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
if (ret)
break;
- lg_local_lock(&c->usage_lock);
+ percpu_down_read_preempt_disable(&c->usage_lock);
g = bucket(ca, b);
/* read mark under btree node lock: */
@@ -320,14 +327,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
put_alloc_field(&d, 2, g->io_time[READ]);
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
put_alloc_field(&d, 2, g->io_time[WRITE]);
- lg_local_unlock(&c->usage_lock);
-
- ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
- BTREE_INSERT_ATOMIC|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
- BTREE_INSERT_NOWAIT,
+ percpu_up_read_preempt_enable(&c->usage_lock);
+
+ ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, flags,
BTREE_INSERT_ENTRY(iter, &a->k_i));
bch2_btree_iter_cond_resched(iter);
} while (ret == -EINTR);
@@ -352,7 +354,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos)
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN,
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL);
+ ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter,
+ NULL, false);
bch2_btree_iter_unlock(&iter);
return ret;
}
@@ -372,7 +375,8 @@ int bch2_alloc_write(struct bch_fs *c)
down_read(&ca->bucket_lock);
for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) {
- ret = __bch2_alloc_write_key(c, ca, bucket, &iter, NULL);
+ ret = __bch2_alloc_write_key(c, ca, bucket, &iter,
+ NULL, false);
if (ret)
break;
@@ -583,15 +587,20 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
{
struct bucket_mark m;
+ percpu_down_read_preempt_disable(&c->usage_lock);
spin_lock(&c->freelist_lock);
+
if (!bch2_invalidate_bucket(c, ca, bucket, &m)) {
spin_unlock(&c->freelist_lock);
+ percpu_up_read_preempt_enable(&c->usage_lock);
return;
}
verify_not_on_freelist(c, ca, bucket);
BUG_ON(!fifo_push(&ca->free_inc, bucket));
+
spin_unlock(&c->freelist_lock);
+ percpu_up_read_preempt_enable(&c->usage_lock);
/* gc lock held: */
bucket_io_clock_reset(c, ca, bucket, READ);
@@ -812,7 +821,8 @@ static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca)
}
static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
- u64 *journal_seq, size_t nr)
+ u64 *journal_seq, size_t nr,
+ bool nowait)
{
struct btree_iter iter;
int ret = 0;
@@ -820,14 +830,12 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0),
BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
- /*
- * XXX: if ca->nr_invalidated != 0, just return if we'd block doing the
- * btree update or journal_res_get
- */
+ /* Only use nowait if we've already invalidated at least one bucket: */
while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) {
size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated);
- ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq);
+ ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq,
+ nowait && ca->nr_invalidated);
if (ret)
break;
@@ -835,7 +843,9 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca,
}
bch2_btree_iter_unlock(&iter);
- return ret;
+
+ /* If we used NOWAIT, don't return the error: */
+ return ca->nr_invalidated ? 0 : ret;
}
static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket)
@@ -943,7 +953,8 @@ static int bch2_allocator_thread(void *arg)
fifo_used(&ca->free_inc));
journal_seq = 0;
- ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX);
+ ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
+ SIZE_MAX, true);
if (ret) {
bch_err(ca, "error invalidating buckets: %i", ret);
goto stop;
@@ -1077,11 +1088,15 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+ percpu_down_read_preempt_disable(&c->usage_lock);
spin_lock(&ob->lock);
+
bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr),
false, gc_pos_alloc(c, ob), 0);
ob->valid = false;
+
spin_unlock(&ob->lock);
+ percpu_up_read_preempt_enable(&c->usage_lock);
spin_lock(&c->freelist_lock);
ob->freelist = c->open_buckets_freelist;
@@ -1151,6 +1166,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
long bucket;
spin_lock(&c->freelist_lock);
+
if (may_alloc_partial &&
ca->open_buckets_partial_nr) {
int ret = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
@@ -1202,7 +1218,6 @@ out:
ob = bch2_open_bucket_alloc(c);
spin_lock(&ob->lock);
- lg_local_lock(&c->usage_lock);
buckets = bucket_array(ca);
ob->valid = true;
@@ -1215,8 +1230,6 @@ out:
bucket_io_clock_reset(c, ca, bucket, READ);
bucket_io_clock_reset(c, ca, bucket, WRITE);
-
- lg_local_unlock(&c->usage_lock);
spin_unlock(&ob->lock);
spin_unlock(&c->freelist_lock);
@@ -1296,7 +1309,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
if (nr_ptrs_effective >= nr_replicas)
return ALLOC_SUCCESS;
- rcu_read_lock();
devs_sorted = bch2_wp_alloc_list(c, wp, devs);
for (i = 0; i < devs_sorted.nr; i++) {
@@ -1337,7 +1349,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
break;
}
}
- rcu_read_unlock();
EBUG_ON(reserve == RESERVE_MOVINGGC &&
ret != ALLOC_SUCCESS &&
@@ -1422,8 +1433,13 @@ static int open_bucket_add_buckets(struct bch_fs *c,
struct closure *cl)
{
struct bch_devs_mask devs = c->rw_devs[wp->type];
+ const struct bch_devs_mask *t;
struct open_bucket *ob;
unsigned i;
+ int ret;
+
+ percpu_down_read_preempt_disable(&c->usage_lock);
+ rcu_read_lock();
/* Don't allocate from devices we already have pointers to: */
for (i = 0; i < devs_have->nr; i++)
@@ -1432,17 +1448,16 @@ static int open_bucket_add_buckets(struct bch_fs *c,
writepoint_for_each_ptr_all(wp, ob, i)
__clear_bit(ob->ptr.dev, devs.d);
- if (target) {
- const struct bch_devs_mask *t;
+ t = bch2_target_to_mask(c, target);
+ if (t)
+ bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
- rcu_read_lock();
- t = bch2_target_to_mask(c, target);
- if (t)
- bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
- rcu_read_unlock();
- }
+ ret = bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
- return bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
+ rcu_read_unlock();
+ percpu_up_read_preempt_enable(&c->usage_lock);
+
+ return ret;
}
static struct write_point *__writepoint_find(struct hlist_head *head,
@@ -1980,10 +1995,12 @@ static int __bch2_fs_allocator_start(struct bch_fs *c)
if (!is_available_bucket(m) || m.cached_sectors)
continue;
+ percpu_down_read_preempt_disable(&c->usage_lock);
bch2_mark_alloc_bucket(c, ca, bu, true,
gc_pos_alloc(c, NULL),
BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE|
BCH_BUCKET_MARK_GC_LOCK_HELD);
+ percpu_up_read_preempt_enable(&c->usage_lock);
fifo_push(&ca->free_inc, bu);
ca->nr_invalidated++;
@@ -2051,7 +2068,8 @@ not_enough:
for_each_rw_member(ca, c, dev_iter) {
ret = bch2_invalidate_free_inc(c, ca, &journal_seq,
- ca->free[RESERVE_BTREE].size);
+ ca->free[RESERVE_BTREE].size,
+ false);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;