diff options
Diffstat (limited to 'libbcachefs/alloc.c')
-rw-r--r-- | libbcachefs/alloc.c | 92 |
1 files changed, 55 insertions, 37 deletions
diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c index 44f9479e..ac2c7d1f 100644 --- a/libbcachefs/alloc.c +++ b/libbcachefs/alloc.c @@ -223,7 +223,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) if (a.k->p.offset >= ca->mi.nbuckets) return; - lg_local_lock(&c->usage_lock); + percpu_down_read_preempt_disable(&c->usage_lock); g = bucket(ca, a.k->p.offset); bucket_cmpxchg(g, new, ({ @@ -237,7 +237,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME)) g->io_time[WRITE] = get_alloc_field(&d, 2); - lg_local_unlock(&c->usage_lock); + percpu_up_read_preempt_enable(&c->usage_lock); } int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) @@ -288,7 +288,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, size_t b, struct btree_iter *iter, - u64 *journal_seq) + u64 *journal_seq, bool nowait) { struct bucket_mark m; __BKEY_PADDED(k, DIV_ROUND_UP(sizeof(struct bch_alloc), 8)) alloc_key; @@ -296,6 +296,13 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, struct bkey_i_alloc *a; u8 *d; int ret; + unsigned flags = BTREE_INSERT_ATOMIC| + BTREE_INSERT_NOFAIL| + BTREE_INSERT_USE_RESERVE| + BTREE_INSERT_USE_ALLOC_RESERVE; + + if (nowait) + flags |= BTREE_INSERT_NOWAIT; bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); @@ -304,7 +311,7 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, if (ret) break; - lg_local_lock(&c->usage_lock); + percpu_down_read_preempt_disable(&c->usage_lock); g = bucket(ca, b); /* read mark under btree node lock: */ @@ -320,14 +327,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, put_alloc_field(&d, 2, g->io_time[READ]); if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME)) put_alloc_field(&d, 2, g->io_time[WRITE]); - lg_local_unlock(&c->usage_lock); - - ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, - BTREE_INSERT_ATOMIC| - BTREE_INSERT_NOFAIL| - BTREE_INSERT_USE_RESERVE| - BTREE_INSERT_USE_ALLOC_RESERVE| - BTREE_INSERT_NOWAIT, + percpu_up_read_preempt_enable(&c->usage_lock); + + ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, flags, BTREE_INSERT_ENTRY(iter, &a->k_i)); bch2_btree_iter_cond_resched(iter); } while (ret == -EINTR); @@ -352,7 +354,8 @@ int bch2_alloc_replay_key(struct bch_fs *c, struct bpos pos) bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS_MIN, BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, NULL); + ret = __bch2_alloc_write_key(c, ca, pos.offset, &iter, + NULL, false); bch2_btree_iter_unlock(&iter); return ret; } @@ -372,7 +375,8 @@ int bch2_alloc_write(struct bch_fs *c) down_read(&ca->bucket_lock); for_each_set_bit(bucket, ca->buckets_dirty, ca->mi.nbuckets) { - ret = __bch2_alloc_write_key(c, ca, bucket, &iter, NULL); + ret = __bch2_alloc_write_key(c, ca, bucket, &iter, + NULL, false); if (ret) break; @@ -583,15 +587,20 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, { struct bucket_mark m; + percpu_down_read_preempt_disable(&c->usage_lock); spin_lock(&c->freelist_lock); + if (!bch2_invalidate_bucket(c, ca, bucket, &m)) { spin_unlock(&c->freelist_lock); + percpu_up_read_preempt_enable(&c->usage_lock); return; } verify_not_on_freelist(c, ca, bucket); BUG_ON(!fifo_push(&ca->free_inc, bucket)); + spin_unlock(&c->freelist_lock); + percpu_up_read_preempt_enable(&c->usage_lock); /* gc lock held: */ bucket_io_clock_reset(c, ca, bucket, READ); @@ -812,7 +821,8 @@ static void sort_free_inc(struct bch_fs *c, struct bch_dev *ca) } static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca, - u64 *journal_seq, size_t nr) + u64 *journal_seq, size_t nr, + bool nowait) { struct btree_iter iter; int ret = 0; @@ -820,14 +830,12 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca, bch2_btree_iter_init(&iter, c, BTREE_ID_ALLOC, POS(ca->dev_idx, 0), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); - /* - * XXX: if ca->nr_invalidated != 0, just return if we'd block doing the - * btree update or journal_res_get - */ + /* Only use nowait if we've already invalidated at least one bucket: */ while (ca->nr_invalidated < min(nr, fifo_used(&ca->free_inc))) { size_t b = fifo_idx_entry(&ca->free_inc, ca->nr_invalidated); - ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq); + ret = __bch2_alloc_write_key(c, ca, b, &iter, journal_seq, + nowait && ca->nr_invalidated); if (ret) break; @@ -835,7 +843,9 @@ static int bch2_invalidate_free_inc(struct bch_fs *c, struct bch_dev *ca, } bch2_btree_iter_unlock(&iter); - return ret; + + /* If we used NOWAIT, don't return the error: */ + return ca->nr_invalidated ? 0 : ret; } static bool __push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t bucket) @@ -943,7 +953,8 @@ static int bch2_allocator_thread(void *arg) fifo_used(&ca->free_inc)); journal_seq = 0; - ret = bch2_invalidate_free_inc(c, ca, &journal_seq, SIZE_MAX); + ret = bch2_invalidate_free_inc(c, ca, &journal_seq, + SIZE_MAX, true); if (ret) { bch_err(ca, "error invalidating buckets: %i", ret); goto stop; @@ -1077,11 +1088,15 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) { struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev); + percpu_down_read_preempt_disable(&c->usage_lock); spin_lock(&ob->lock); + bch2_mark_alloc_bucket(c, ca, PTR_BUCKET_NR(ca, &ob->ptr), false, gc_pos_alloc(c, ob), 0); ob->valid = false; + spin_unlock(&ob->lock); + percpu_up_read_preempt_enable(&c->usage_lock); spin_lock(&c->freelist_lock); ob->freelist = c->open_buckets_freelist; @@ -1151,6 +1166,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, long bucket; spin_lock(&c->freelist_lock); + if (may_alloc_partial && ca->open_buckets_partial_nr) { int ret = ca->open_buckets_partial[--ca->open_buckets_partial_nr]; @@ -1202,7 +1218,6 @@ out: ob = bch2_open_bucket_alloc(c); spin_lock(&ob->lock); - lg_local_lock(&c->usage_lock); buckets = bucket_array(ca); ob->valid = true; @@ -1215,8 +1230,6 @@ out: bucket_io_clock_reset(c, ca, bucket, READ); bucket_io_clock_reset(c, ca, bucket, WRITE); - - lg_local_unlock(&c->usage_lock); spin_unlock(&ob->lock); spin_unlock(&c->freelist_lock); @@ -1296,7 +1309,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c, if (nr_ptrs_effective >= nr_replicas) return ALLOC_SUCCESS; - rcu_read_lock(); devs_sorted = bch2_wp_alloc_list(c, wp, devs); for (i = 0; i < devs_sorted.nr; i++) { @@ -1337,7 +1349,6 @@ static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c, break; } } - rcu_read_unlock(); EBUG_ON(reserve == RESERVE_MOVINGGC && ret != ALLOC_SUCCESS && @@ -1422,8 +1433,13 @@ static int open_bucket_add_buckets(struct bch_fs *c, struct closure *cl) { struct bch_devs_mask devs = c->rw_devs[wp->type]; + const struct bch_devs_mask *t; struct open_bucket *ob; unsigned i; + int ret; + + percpu_down_read_preempt_disable(&c->usage_lock); + rcu_read_lock(); /* Don't allocate from devices we already have pointers to: */ for (i = 0; i < devs_have->nr; i++) @@ -1432,17 +1448,16 @@ static int open_bucket_add_buckets(struct bch_fs *c, writepoint_for_each_ptr_all(wp, ob, i) __clear_bit(ob->ptr.dev, devs.d); - if (target) { - const struct bch_devs_mask *t; + t = bch2_target_to_mask(c, target); + if (t) + bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX); - rcu_read_lock(); - t = bch2_target_to_mask(c, target); - if (t) - bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX); - rcu_read_unlock(); - } + ret = bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl); - return bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl); + rcu_read_unlock(); + percpu_up_read_preempt_enable(&c->usage_lock); + + return ret; } static struct write_point *__writepoint_find(struct hlist_head *head, @@ -1980,10 +1995,12 @@ static int __bch2_fs_allocator_start(struct bch_fs *c) if (!is_available_bucket(m) || m.cached_sectors) continue; + percpu_down_read_preempt_disable(&c->usage_lock); bch2_mark_alloc_bucket(c, ca, bu, true, gc_pos_alloc(c, NULL), BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE| BCH_BUCKET_MARK_GC_LOCK_HELD); + percpu_up_read_preempt_enable(&c->usage_lock); fifo_push(&ca->free_inc, bu); ca->nr_invalidated++; @@ -2051,7 +2068,8 @@ not_enough: for_each_rw_member(ca, c, dev_iter) { ret = bch2_invalidate_free_inc(c, ca, &journal_seq, - ca->free[RESERVE_BTREE].size); + ca->free[RESERVE_BTREE].size, + false); if (ret) { percpu_ref_put(&ca->io_ref); return ret; |