summaryrefslogtreecommitdiff
path: root/libbcachefs/buckets.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r--libbcachefs/buckets.c205
1 files changed, 187 insertions, 18 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 8899e3c6..2dbe7d37 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -67,6 +67,7 @@
#include "btree_gc.h"
#include "buckets.h"
#include "error.h"
+#include "movinggc.h"
#include <linux/preempt.h>
#include <trace/events/bcachefs.h>
@@ -147,12 +148,16 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
{
u16 last_seq_ondisk = c->journal.last_seq_ondisk;
struct bch_dev *ca;
+ struct bucket_array *buckets;
struct bucket *g;
struct bucket_mark m;
unsigned i;
- for_each_member_device(ca, c, i)
- for_each_bucket(g, ca) {
+ for_each_member_device(ca, c, i) {
+ down_read(&ca->bucket_lock);
+ buckets = bucket_array(ca);
+
+ for_each_bucket(g, buckets) {
bucket_cmpxchg(g, m, ({
if (!m.journal_seq_valid ||
bucket_needs_journal_commit(m, last_seq_ondisk))
@@ -161,6 +166,8 @@ void bch2_bucket_seq_cleanup(struct bch_fs *c)
m.journal_seq_valid = 0;
}));
}
+ up_read(&ca->bucket_lock);
+ }
}
#define bch2_usage_add(_acc, _stats) \
@@ -319,20 +326,17 @@ void bch2_fs_usage_apply(struct bch_fs *c,
}
static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
- struct bucket *g, struct bucket_mark old,
- struct bucket_mark new)
+ struct bucket_mark old, struct bucket_mark new)
{
struct bch_dev_usage *dev_usage;
- BUG_ON((g - ca->buckets) < ca->mi.first_bucket ||
- (g - ca->buckets) >= ca->mi.nbuckets);
+ lockdep_assert_held(&c->usage_lock);
bch2_fs_inconsistent_on(old.data_type && new.data_type &&
old.data_type != new.data_type, c,
"different types of data in same bucket: %u, %u",
old.data_type, new.data_type);
- preempt_disable();
dev_usage = this_cpu_ptr(ca->usage_percpu);
dev_usage->buckets[bucket_type(old)]--;
@@ -347,7 +351,6 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
dev_usage->sectors[new.data_type] += new.dirty_sectors;
dev_usage->sectors[BCH_DATA_CACHED] +=
(int) new.cached_sectors - (int) old.cached_sectors;
- preempt_enable();
if (!is_available_bucket(old) && is_available_bucket(new))
bch2_wake_allocator(ca);
@@ -359,16 +362,19 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\
- bch2_dev_usage_update(c, ca, g, _old, new); \
+ bch2_dev_usage_update(c, ca, _old, new); \
_old; \
})
bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
- struct bucket *g, struct bucket_mark *old)
+ size_t b, struct bucket_mark *old)
{
+ struct bucket *g;
struct bucket_mark new;
lg_local_lock(&c->usage_lock);
+ g = bucket(ca, b);
+
*old = bucket_data_cmpxchg(c, ca, g, new, ({
if (!is_available_bucket(new)) {
lg_local_unlock(&c->usage_lock);
@@ -385,20 +391,22 @@ bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
lg_local_unlock(&c->usage_lock);
if (!old->owned_by_allocator && old->cached_sectors)
- trace_invalidate(ca, bucket_to_sector(ca, g - ca->buckets),
+ trace_invalidate(ca, bucket_to_sector(ca, b),
old->cached_sectors);
return true;
}
bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca,
- struct bucket *g)
+ size_t b)
{
+ struct bucket *g;
struct bucket_mark new, old;
lg_local_lock(&c->usage_lock);
+ g = bucket(ca, b);
+
old = bucket_data_cmpxchg(c, ca, g, new, ({
- if (new.touched_this_mount ||
- !is_available_bucket(new)) {
+ if (!is_startup_available_bucket(new)) {
lg_local_unlock(&c->usage_lock);
return false;
}
@@ -412,12 +420,15 @@ bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca,
}
void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
- struct bucket *g, bool owned_by_allocator,
+ size_t b, bool owned_by_allocator,
struct gc_pos pos, unsigned flags)
{
+ struct bucket *g;
struct bucket_mark old, new;
lg_local_lock(&c->usage_lock);
+ g = bucket(ca, b);
+
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
gc_will_visit(c, pos)) {
lg_local_unlock(&c->usage_lock);
@@ -448,15 +459,18 @@ do { \
} while (0)
void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
- struct bucket *g, enum bch_data_type type,
+ size_t b, enum bch_data_type type,
unsigned sectors, struct gc_pos pos,
unsigned flags)
{
+ struct bucket *g;
struct bucket_mark old, new;
BUG_ON(!type);
lg_local_lock(&c->usage_lock);
+ g = bucket(ca, b);
+
if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
gc_will_visit(c, pos)) {
lg_local_unlock(&c->usage_lock);
@@ -502,7 +516,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
struct bucket_mark old, new;
unsigned saturated;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket *g = ca->buckets + PTR_BUCKET_NR(ca, ptr);
+ struct bucket *g = PTR_BUCKET(ca, ptr);
enum bch_data_type data_type = type == S_META
? BCH_DATA_BTREE : BCH_DATA_USER;
u64 v;
@@ -584,7 +598,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
old.counter,
new.counter)) != old.counter);
- bch2_dev_usage_update(c, ca, g, old, new);
+ bch2_dev_usage_update(c, ca, old, new);
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
bucket_became_unavailable(c, old, new));
@@ -810,3 +824,158 @@ int bch2_disk_reservation_get(struct bch_fs *c,
return bch2_disk_reservation_add(c, res, sectors, flags);
}
+
+/* Startup/shutdown: */
+
+static void buckets_free_rcu(struct rcu_head *rcu)
+{
+ struct bucket_array *buckets =
+ container_of(rcu, struct bucket_array, rcu);
+
+ kvpfree(buckets,
+ sizeof(struct bucket_array) +
+ buckets->nbuckets * sizeof(struct bucket));
+}
+
+int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
+{
+ struct bucket_array *buckets = NULL, *old_buckets;
+ unsigned long *buckets_dirty = NULL;
+ u8 *oldest_gens = NULL;
+ alloc_fifo free[RESERVE_NR];
+ alloc_fifo free_inc;
+ alloc_heap alloc_heap;
+ copygc_heap copygc_heap;
+
+ size_t btree_reserve = DIV_ROUND_UP(BTREE_NODE_RESERVE,
+ ca->mi.bucket_size / c->opts.btree_node_size);
+ /* XXX: these should be tunable */
+ size_t reserve_none = max_t(size_t, 4, ca->mi.nbuckets >> 9);
+ size_t copygc_reserve = max_t(size_t, 16, ca->mi.nbuckets >> 7);
+ size_t free_inc_reserve = copygc_reserve / 2;
+ bool resize = ca->buckets != NULL,
+ start_copygc = ca->copygc_thread != NULL;
+ int ret = -ENOMEM;
+ unsigned i;
+
+ memset(&free, 0, sizeof(free));
+ memset(&free_inc, 0, sizeof(free_inc));
+ memset(&alloc_heap, 0, sizeof(alloc_heap));
+ memset(&copygc_heap, 0, sizeof(copygc_heap));
+
+ if (!(buckets = kvpmalloc(sizeof(struct bucket_array) +
+ nbuckets * sizeof(struct bucket),
+ GFP_KERNEL|__GFP_ZERO)) ||
+ !(oldest_gens = kvpmalloc(nbuckets * sizeof(u8),
+ GFP_KERNEL|__GFP_ZERO)) ||
+ !(buckets_dirty = kvpmalloc(BITS_TO_LONGS(nbuckets) *
+ sizeof(unsigned long),
+ GFP_KERNEL|__GFP_ZERO)) ||
+ !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
+ !init_fifo(&free[RESERVE_MOVINGGC],
+ copygc_reserve, GFP_KERNEL) ||
+ !init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
+ !init_fifo(&free_inc, free_inc_reserve, GFP_KERNEL) ||
+ !init_heap(&alloc_heap, free_inc_reserve, GFP_KERNEL) ||
+ !init_heap(&copygc_heap, copygc_reserve, GFP_KERNEL))
+ goto err;
+
+ buckets->first_bucket = ca->mi.first_bucket;
+ buckets->nbuckets = nbuckets;
+
+ bch2_copygc_stop(ca);
+
+ down_write(&c->gc_lock);
+ down_write(&ca->bucket_lock);
+ lg_global_lock(&c->usage_lock);
+
+ old_buckets = bucket_array(ca);
+
+ if (resize) {
+ size_t n = min(buckets->nbuckets, old_buckets->nbuckets);
+
+ memcpy(buckets->b,
+ old_buckets->b,
+ n * sizeof(struct bucket));
+ memcpy(oldest_gens,
+ ca->oldest_gens,
+ n * sizeof(u8));
+ memcpy(buckets_dirty,
+ ca->buckets_dirty,
+ BITS_TO_LONGS(n) * sizeof(unsigned long));
+ }
+
+ rcu_assign_pointer(ca->buckets, buckets);
+ buckets = old_buckets;
+
+ swap(ca->oldest_gens, oldest_gens);
+ swap(ca->buckets_dirty, buckets_dirty);
+
+ lg_global_unlock(&c->usage_lock);
+
+ spin_lock(&c->freelist_lock);
+ for (i = 0; i < RESERVE_NR; i++) {
+ fifo_move(&free[i], &ca->free[i]);
+ swap(ca->free[i], free[i]);
+ }
+ fifo_move(&free_inc, &ca->free_inc);
+ swap(ca->free_inc, free_inc);
+ spin_unlock(&c->freelist_lock);
+
+ /* with gc lock held, alloc_heap can't be in use: */
+ swap(ca->alloc_heap, alloc_heap);
+
+ /* and we shut down copygc: */
+ swap(ca->copygc_heap, copygc_heap);
+
+ nbuckets = ca->mi.nbuckets;
+
+ up_write(&ca->bucket_lock);
+ up_write(&c->gc_lock);
+
+ if (start_copygc &&
+ bch2_copygc_start(c, ca))
+ bch_err(ca, "error restarting copygc thread");
+
+ ret = 0;
+err:
+ free_heap(&copygc_heap);
+ free_heap(&alloc_heap);
+ free_fifo(&free_inc);
+ for (i = 0; i < RESERVE_NR; i++)
+ free_fifo(&free[i]);
+ kvpfree(buckets_dirty,
+ BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
+ kvpfree(oldest_gens,
+ nbuckets * sizeof(u8));
+ if (buckets)
+ call_rcu(&old_buckets->rcu, buckets_free_rcu);
+
+ return ret;
+}
+
+void bch2_dev_buckets_free(struct bch_dev *ca)
+{
+ unsigned i;
+
+ free_heap(&ca->copygc_heap);
+ free_heap(&ca->alloc_heap);
+ free_fifo(&ca->free_inc);
+ for (i = 0; i < RESERVE_NR; i++)
+ free_fifo(&ca->free[i]);
+ kvpfree(ca->buckets_dirty,
+ BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
+ kvpfree(ca->oldest_gens, ca->mi.nbuckets * sizeof(u8));
+ kvpfree(ca->buckets, sizeof(struct bucket_array) +
+ ca->mi.nbuckets * sizeof(struct bucket));
+
+ free_percpu(ca->usage_percpu);
+}
+
+int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca)
+{
+ if (!(ca->usage_percpu = alloc_percpu(struct bch_dev_usage)))
+ return -ENOMEM;
+
+ return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets);;
+}