diff options
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r-- | libbcachefs/buckets.c | 205 |
1 files changed, 187 insertions, 18 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 8899e3c6..2dbe7d37 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -67,6 +67,7 @@ #include "btree_gc.h" #include "buckets.h" #include "error.h" +#include "movinggc.h" #include <linux/preempt.h> #include <trace/events/bcachefs.h> @@ -147,12 +148,16 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c) { u16 last_seq_ondisk = c->journal.last_seq_ondisk; struct bch_dev *ca; + struct bucket_array *buckets; struct bucket *g; struct bucket_mark m; unsigned i; - for_each_member_device(ca, c, i) - for_each_bucket(g, ca) { + for_each_member_device(ca, c, i) { + down_read(&ca->bucket_lock); + buckets = bucket_array(ca); + + for_each_bucket(g, buckets) { bucket_cmpxchg(g, m, ({ if (!m.journal_seq_valid || bucket_needs_journal_commit(m, last_seq_ondisk)) @@ -161,6 +166,8 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c) m.journal_seq_valid = 0; })); } + up_read(&ca->bucket_lock); + } } #define bch2_usage_add(_acc, _stats) \ @@ -319,20 +326,17 @@ void bch2_fs_usage_apply(struct bch_fs *c, } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, - struct bucket *g, struct bucket_mark old, - struct bucket_mark new) + struct bucket_mark old, struct bucket_mark new) { struct bch_dev_usage *dev_usage; - BUG_ON((g - ca->buckets) < ca->mi.first_bucket || - (g - ca->buckets) >= ca->mi.nbuckets); + lockdep_assert_held(&c->usage_lock); bch2_fs_inconsistent_on(old.data_type && new.data_type && old.data_type != new.data_type, c, "different types of data in same bucket: %u, %u", old.data_type, new.data_type); - preempt_disable(); dev_usage = this_cpu_ptr(ca->usage_percpu); dev_usage->buckets[bucket_type(old)]--; @@ -347,7 +351,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, dev_usage->sectors[new.data_type] += new.dirty_sectors; dev_usage->sectors[BCH_DATA_CACHED] += (int) new.cached_sectors - (int) old.cached_sectors; - preempt_enable(); if (!is_available_bucket(old) && is_available_bucket(new)) bch2_wake_allocator(ca); @@ -359,16 +362,19 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, ({ \ struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \ \ - bch2_dev_usage_update(c, ca, g, _old, new); \ + bch2_dev_usage_update(c, ca, _old, new); \ _old; \ }) bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, - struct bucket *g, struct bucket_mark *old) + size_t b, struct bucket_mark *old) { + struct bucket *g; struct bucket_mark new; lg_local_lock(&c->usage_lock); + g = bucket(ca, b); + *old = bucket_data_cmpxchg(c, ca, g, new, ({ if (!is_available_bucket(new)) { lg_local_unlock(&c->usage_lock); @@ -385,20 +391,22 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, lg_local_unlock(&c->usage_lock); if (!old->owned_by_allocator && old->cached_sectors) - trace_invalidate(ca, bucket_to_sector(ca, g - ca->buckets), + trace_invalidate(ca, bucket_to_sector(ca, b), old->cached_sectors); return true; } bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca, - struct bucket *g) + size_t b) { + struct bucket *g; struct bucket_mark new, old; lg_local_lock(&c->usage_lock); + g = bucket(ca, b); + old = bucket_data_cmpxchg(c, ca, g, new, ({ - if (new.touched_this_mount || - !is_available_bucket(new)) { + if (!is_startup_available_bucket(new)) { lg_local_unlock(&c->usage_lock); return false; } @@ -412,12 +420,15 @@ bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca, } void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca, - struct bucket *g, bool owned_by_allocator, + size_t b, bool owned_by_allocator, struct gc_pos pos, unsigned flags) { + struct bucket *g; struct bucket_mark old, new; lg_local_lock(&c->usage_lock); + g = bucket(ca, b); + if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) && gc_will_visit(c, pos)) { lg_local_unlock(&c->usage_lock); @@ -448,15 +459,18 @@ do { \ } while (0) void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, - struct bucket *g, enum bch_data_type type, + size_t b, enum bch_data_type type, unsigned sectors, struct gc_pos pos, unsigned flags) { + struct bucket *g; struct bucket_mark old, new; BUG_ON(!type); lg_local_lock(&c->usage_lock); + g = bucket(ca, b); + if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) && gc_will_visit(c, pos)) { lg_local_unlock(&c->usage_lock); @@ -502,7 +516,7 @@ static void bch2_mark_pointer(struct bch_fs *c, struct bucket_mark old, new; unsigned saturated; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr); + struct bucket *g = PTR_BUCKET(ca, ptr); enum bch_data_type data_type = type == S_META ? BCH_DATA_BTREE : BCH_DATA_USER; u64 v; @@ -584,7 +598,7 @@ static void bch2_mark_pointer(struct bch_fs *c, old.counter, new.counter)) != old.counter); - bch2_dev_usage_update(c, ca, g, old, new); + bch2_dev_usage_update(c, ca, old, new); BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) && bucket_became_unavailable(c, old, new)); @@ -810,3 +824,158 @@ int bch2_disk_reservation_get(struct bch_fs *c, return bch2_disk_reservation_add(c, res, sectors, flags); } + +/* Startup/shutdown: */ + +static void buckets_free_rcu(struct rcu_head *rcu) +{ + struct bucket_array *buckets = + container_of(rcu, struct bucket_array, rcu); + + kvpfree(buckets, + sizeof(struct bucket_array) + + buckets->nbuckets * sizeof(struct bucket)); +} + +int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) +{ + struct bucket_array *buckets = NULL, *old_buckets; + unsigned long *buckets_dirty = NULL; + u8 *oldest_gens = NULL; + alloc_fifo free[RESERVE_NR]; + alloc_fifo free_inc; + alloc_heap alloc_heap; + copygc_heap copygc_heap; + + size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE, + ca->mi.bucket_size / c->opts.btree_node_size); + /* XXX: these should be tunable */ + size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9); + size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7); + size_t free_inc_reserve = copygc_reserve / 2; + bool resize = ca->buckets != NULL, + start_copygc = ca->copygc_thread != NULL; + int ret = -ENOMEM; + unsigned i; + + memset(&free, 0, sizeof(free)); + memset(&free_inc, 0, sizeof(free_inc)); + memset(&alloc_heap, 0, sizeof(alloc_heap)); + memset(©gc_heap, 0, sizeof(copygc_heap)); + + if (!(buckets = kvpmalloc(sizeof(struct bucket_array) + + nbuckets * sizeof(struct bucket), + GFP_KERNEL|__GFP_ZERO)) || + !(oldest_gens = kvpmalloc(nbuckets * sizeof(u8), + GFP_KERNEL|__GFP_ZERO)) || + !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) * + sizeof(unsigned long), + GFP_KERNEL|__GFP_ZERO)) || + !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) || + !init_fifo(&free[RESERVE_MOVINGGC], + copygc_reserve, GFP_KERNEL) || + !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) || + !init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) || + !init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) || + !init_heap(©gc_heap, copygc_reserve, GFP_KERNEL)) + goto err; + + buckets->first_bucket = ca->mi.first_bucket; + buckets->nbuckets = nbuckets; + + bch2_copygc_stop(ca); + + down_write(&c->gc_lock); + down_write(&ca->bucket_lock); + lg_global_lock(&c->usage_lock); + + old_buckets = bucket_array(ca); + + if (resize) { + size_t n = min(buckets->nbuckets, old_buckets->nbuckets); + + memcpy(buckets->b, + old_buckets->b, + n * sizeof(struct bucket)); + memcpy(oldest_gens, + ca->oldest_gens, + n * sizeof(u8)); + memcpy(buckets_dirty, + ca->buckets_dirty, + BITS_TO_LONGS(n) * sizeof(unsigned long)); + } + + rcu_assign_pointer(ca->buckets, buckets); + buckets = old_buckets; + + swap(ca->oldest_gens, oldest_gens); + swap(ca->buckets_dirty, buckets_dirty); + + lg_global_unlock(&c->usage_lock); + + spin_lock(&c->freelist_lock); + for (i = 0; i < RESERVE_NR; i++) { + fifo_move(&free[i], &ca->free[i]); + swap(ca->free[i], free[i]); + } + fifo_move(&free_inc, &ca->free_inc); + swap(ca->free_inc, free_inc); + spin_unlock(&c->freelist_lock); + + /* with gc lock held, alloc_heap can't be in use: */ + swap(ca->alloc_heap, alloc_heap); + + /* and we shut down copygc: */ + swap(ca->copygc_heap, copygc_heap); + + nbuckets = ca->mi.nbuckets; + + up_write(&ca->bucket_lock); + up_write(&c->gc_lock); + + if (start_copygc && + bch2_copygc_start(c, ca)) + bch_err(ca, "error restarting copygc thread"); + + ret = 0; +err: + free_heap(©gc_heap); + free_heap(&alloc_heap); + free_fifo(&free_inc); + for (i = 0; i < RESERVE_NR; i++) + free_fifo(&free[i]); + kvpfree(buckets_dirty, + BITS_TO_LONGS(nbuckets) * sizeof(unsigned long)); + kvpfree(oldest_gens, + nbuckets * sizeof(u8)); + if (buckets) + call_rcu(&old_buckets->rcu, buckets_free_rcu); + + return ret; +} + +void bch2_dev_buckets_free(struct bch_dev *ca) +{ + unsigned i; + + free_heap(&ca->copygc_heap); + free_heap(&ca->alloc_heap); + free_fifo(&ca->free_inc); + for (i = 0; i < RESERVE_NR; i++) + free_fifo(&ca->free[i]); + kvpfree(ca->buckets_dirty, + BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long)); + kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8)); + kvpfree(ca->buckets, sizeof(struct bucket_array) + + ca->mi.nbuckets * sizeof(struct bucket)); + + free_percpu(ca->usage_percpu); +} + +int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) +{ + if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage))) + return -ENOMEM; + + return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);; +} |