summaryrefslogtreecommitdiff
path: root/libbcachefs/buckets.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/buckets.c')
-rw-r--r--libbcachefs/buckets.c350
1 files changed, 190 insertions, 160 deletions
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 6fdbb464..b73002de 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -101,9 +101,41 @@ static void bch2_fs_stats_verify(struct bch_fs *c)
stats.online_reserved);
}
+static void bch2_dev_stats_verify(struct bch_dev *ca)
+{
+ struct bch_dev_usage stats =
+ __bch2_dev_usage_read(ca);
+ u64 n = ca->mi.nbuckets - ca->mi.first_bucket;
+
+ BUG_ON(stats.buckets[S_META] > n);
+ BUG_ON(stats.buckets[S_DIRTY] > n);
+ BUG_ON(stats.buckets_cached > n);
+ BUG_ON(stats.buckets_alloc > n);
+ BUG_ON(stats.buckets_unavailable > n);
+}
+
+static void bch2_disk_reservations_verify(struct bch_fs *c, int flags)
+{
+ if (!(flags & BCH_DISK_RESERVATION_NOFAIL)) {
+ u64 used = __bch2_fs_sectors_used(c);
+ u64 cached = 0;
+ u64 avail = atomic64_read(&c->sectors_available);
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ cached += per_cpu_ptr(c->usage_percpu, cpu)->available_cache;
+
+ if (used + avail + cached > c->capacity)
+ panic("used %llu avail %llu cached %llu capacity %llu\n",
+ used, avail, cached, c->capacity);
+ }
+}
+
#else
static void bch2_fs_stats_verify(struct bch_fs *c) {}
+static void bch2_dev_stats_verify(struct bch_dev *ca) {}
+static void bch2_disk_reservations_verify(struct bch_fs *c, int flags) {}
#endif
@@ -171,11 +203,9 @@ struct bch_dev_usage __bch2_dev_usage_read(struct bch_dev *ca)
return bch2_usage_read_raw(ca->usage_percpu);
}
-struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca)
+struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
- return bch2_usage_read_cached(ca->fs,
- ca->usage_cached,
- ca->usage_percpu);
+ return bch2_usage_read_cached(c, ca->usage_cached, ca->usage_percpu);
}
struct bch_fs_usage
@@ -208,6 +238,11 @@ static inline int is_cached_bucket(struct bucket_mark m)
!m.dirty_sectors && !!m.cached_sectors;
}
+static inline int is_unavailable_bucket(struct bucket_mark m)
+{
+ return !is_available_bucket(m);
+}
+
static inline enum s_alloc bucket_type(struct bucket_mark m)
{
return is_meta_bucket(m) ? S_META : S_DIRTY;
@@ -256,12 +291,15 @@ void bch2_fs_usage_apply(struct bch_fs *c,
memset(stats, 0, sizeof(*stats));
}
-static void bch2_dev_usage_update(struct bch_dev *ca,
- struct bucket_mark old, struct bucket_mark new)
+static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *g, struct bucket_mark old,
+ struct bucket_mark new)
{
- struct bch_fs *c = ca->fs;
struct bch_dev_usage *dev_usage;
+ BUG_ON((g - ca->buckets) < ca->mi.first_bucket ||
+ (g - ca->buckets) >= ca->mi.nbuckets);
+
bch2_fs_inconsistent_on(old.data_type && new.data_type &&
old.data_type != new.data_type, c,
"different types of metadata in same bucket: %u, %u",
@@ -270,38 +308,44 @@ static void bch2_dev_usage_update(struct bch_dev *ca,
preempt_disable();
dev_usage = this_cpu_ptr(ca->usage_percpu);
- dev_usage->sectors_cached +=
- (int) new.cached_sectors - (int) old.cached_sectors;
-
- dev_usage->sectors[bucket_type(old)] -= old.dirty_sectors;
- dev_usage->sectors[bucket_type(new)] += new.dirty_sectors;
-
+ dev_usage->buckets[S_META] +=
+ is_meta_bucket(new) - is_meta_bucket(old);
+ dev_usage->buckets[S_DIRTY] +=
+ is_dirty_bucket(new) - is_dirty_bucket(old);
+ dev_usage->buckets_cached +=
+ is_cached_bucket(new) - is_cached_bucket(old);
dev_usage->buckets_alloc +=
(int) new.owned_by_allocator - (int) old.owned_by_allocator;
+ dev_usage->buckets_unavailable +=
+ is_unavailable_bucket(new) - is_unavailable_bucket(old);
- dev_usage->buckets[S_META] += is_meta_bucket(new) - is_meta_bucket(old);
- dev_usage->buckets[S_DIRTY] += is_dirty_bucket(new) - is_dirty_bucket(old);
- dev_usage->buckets_cached += is_cached_bucket(new) - is_cached_bucket(old);
+ dev_usage->sectors[bucket_type(old)] -= old.dirty_sectors;
+ dev_usage->sectors[bucket_type(new)] += new.dirty_sectors;
+ dev_usage->sectors_cached +=
+ (int) new.cached_sectors - (int) old.cached_sectors;
preempt_enable();
if (!is_available_bucket(old) && is_available_bucket(new))
bch2_wake_allocator(ca);
+
+ bch2_dev_stats_verify(ca);
}
-#define bucket_data_cmpxchg(ca, g, new, expr) \
+#define bucket_data_cmpxchg(c, ca, g, new, expr) \
({ \
struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
\
- bch2_dev_usage_update(ca, _old, new); \
+ bch2_dev_usage_update(c, ca, g, _old, new); \
_old; \
})
-bool bch2_invalidate_bucket(struct bch_dev *ca, struct bucket *g,
- struct bucket_mark *old)
+bool bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *g, struct bucket_mark *old)
{
struct bucket_mark new;
- *old = bucket_data_cmpxchg(ca, g, new, ({
+ lg_local_lock(&c->usage_lock);
+ *old = bucket_data_cmpxchg(c, ca, g, new, ({
if (!is_available_bucket(new))
return false;
@@ -312,6 +356,7 @@ bool bch2_invalidate_bucket(struct bch_dev *ca, struct bucket *g,
new.dirty_sectors = 0;
new.gen++;
}));
+ lg_local_unlock(&c->usage_lock);
if (!old->owned_by_allocator && old->cached_sectors)
trace_invalidate(ca, bucket_to_sector(ca, g - ca->buckets),
@@ -319,11 +364,13 @@ bool bch2_invalidate_bucket(struct bch_dev *ca, struct bucket *g,
return true;
}
-bool bch2_mark_alloc_bucket_startup(struct bch_dev *ca, struct bucket *g)
+bool bch2_mark_alloc_bucket_startup(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *g)
{
struct bucket_mark new, old;
- old = bucket_data_cmpxchg(ca, g, new, ({
+ lg_local_lock(&c->usage_lock);
+ old = bucket_data_cmpxchg(c, ca, g, new, ({
if (new.touched_this_mount ||
!is_available_bucket(new))
return false;
@@ -331,37 +378,32 @@ bool bch2_mark_alloc_bucket_startup(struct bch_dev *ca, struct bucket *g)
new.owned_by_allocator = 1;
new.touched_this_mount = 1;
}));
+ lg_local_unlock(&c->usage_lock);
return true;
}
-void bch2_mark_free_bucket(struct bch_dev *ca, struct bucket *g)
+void bch2_mark_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *g, bool owned_by_allocator,
+ struct gc_pos pos, unsigned flags)
{
struct bucket_mark old, new;
- old = bucket_data_cmpxchg(ca, g, new, ({
- new.touched_this_mount = 1;
- new.owned_by_allocator = 0;
- new.data_type = 0;
- new.cached_sectors = 0;
- new.dirty_sectors = 0;
- }));
-
- BUG_ON(bucket_became_unavailable(ca->fs, old, new));
-}
-
-void bch2_mark_alloc_bucket(struct bch_dev *ca, struct bucket *g,
- bool owned_by_allocator)
-{
- struct bucket_mark old, new;
+ lg_local_lock(&c->usage_lock);
+ if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
+ gc_will_visit(c, pos)) {
+ lg_local_unlock(&c->usage_lock);
+ return;
+ }
- old = bucket_data_cmpxchg(ca, g, new, ({
+ old = bucket_data_cmpxchg(c, ca, g, new, ({
new.touched_this_mount = 1;
new.owned_by_allocator = owned_by_allocator;
}));
+ lg_local_unlock(&c->usage_lock);
BUG_ON(!owned_by_allocator && !old.owned_by_allocator &&
- ca->fs->gc_pos.phase == GC_PHASE_DONE);
+ c->gc_pos.phase == GC_PHASE_DONE);
}
#define saturated_add(ca, dst, src, max) \
@@ -377,41 +419,49 @@ do { \
} \
} while (0)
-void bch2_mark_metadata_bucket(struct bch_dev *ca, struct bucket *g,
- enum bucket_data_type type,
- bool may_make_unavailable)
+void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *g, enum bucket_data_type type,
+ struct gc_pos pos, unsigned flags)
{
struct bucket_mark old, new;
BUG_ON(!type);
- old = bucket_data_cmpxchg(ca, g, new, ({
+ lg_local_lock(&c->usage_lock);
+ if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
+ gc_will_visit(c, pos)) {
+ lg_local_unlock(&c->usage_lock);
+ return;
+ }
+
+ old = bucket_data_cmpxchg(c, ca, g, new, ({
saturated_add(ca, new.dirty_sectors, ca->mi.bucket_size,
GC_MAX_SECTORS_USED);
new.data_type = type;
new.touched_this_mount = 1;
}));
+ lg_local_unlock(&c->usage_lock);
if (old.data_type != type &&
(old.data_type ||
old.cached_sectors ||
old.dirty_sectors))
- bch_err(ca->fs, "bucket %zu has multiple types of data (%u, %u)",
+ bch_err(c, "bucket %zu has multiple types of data (%u, %u)",
g - ca->buckets, old.data_type, new.data_type);
- BUG_ON(!may_make_unavailable &&
- bucket_became_unavailable(ca->fs, old, new));
+ BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
+ bucket_became_unavailable(c, old, new));
}
/* Reverting this until the copygc + compression issue is fixed: */
-static int __disk_sectors(const union bch_extent_crc *crc, unsigned sectors)
+static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors)
{
if (!sectors)
return 0;
- return max(1U, DIV_ROUND_UP(sectors * crc_compressed_size(NULL, crc),
- crc_uncompressed_size(NULL, crc)));
+ return max(1U, DIV_ROUND_UP(sectors * crc.compressed_size,
+ crc.uncompressed_size));
}
/*
@@ -420,12 +470,12 @@ static int __disk_sectors(const union bch_extent_crc *crc, unsigned sectors)
* that with the gc pos seqlock held.
*/
static void bch2_mark_pointer(struct bch_fs *c,
- struct bkey_s_c_extent e,
- const union bch_extent_crc *crc,
- const struct bch_extent_ptr *ptr,
- s64 sectors, enum s_alloc type,
- struct bch_fs_usage *stats,
- u64 journal_seq, unsigned flags)
+ struct bkey_s_c_extent e,
+ const struct bch_extent_ptr *ptr,
+ struct bch_extent_crc_unpacked crc,
+ s64 sectors, enum s_alloc type,
+ struct bch_fs_usage *stats,
+ u64 journal_seq, unsigned flags)
{
struct bucket_mark old, new;
unsigned saturated;
@@ -435,7 +485,7 @@ static void bch2_mark_pointer(struct bch_fs *c,
? BUCKET_BTREE : BUCKET_DATA;
u64 v;
- if (crc_compression_type(crc)) {
+ if (crc.compression_type) {
unsigned old_sectors, new_sectors;
if (sectors > 0) {
@@ -512,13 +562,13 @@ static void bch2_mark_pointer(struct bch_fs *c,
old.counter,
new.counter)) != old.counter);
- bch2_dev_usage_update(ca, old, new);
+ bch2_dev_usage_update(c, ca, g, old, new);
if (old.data_type != data_type &&
(old.data_type ||
old.cached_sectors ||
old.dirty_sectors))
- bch_err(ca->fs, "bucket %zu has multiple types of data (%u, %u)",
+ bch_err(c, "bucket %zu has multiple types of data (%u, %u)",
g - ca->buckets, old.data_type, new.data_type);
BUG_ON(!(flags & BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE) &&
@@ -535,71 +585,12 @@ static void bch2_mark_pointer(struct bch_fs *c,
}
}
-static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c_extent e,
- s64 sectors, bool metadata,
- struct bch_fs_usage *stats,
- u64 journal_seq, unsigned flags)
-{
- const struct bch_extent_ptr *ptr;
- const union bch_extent_crc *crc;
- enum s_alloc type = metadata ? S_META : S_DIRTY;
- unsigned replicas = 0;
-
- BUG_ON(metadata && bkey_extent_is_cached(e.k));
- BUG_ON(!sectors);
-
- extent_for_each_ptr_crc(e, ptr, crc) {
- bch2_mark_pointer(c, e, crc, ptr, sectors, type,
- stats, journal_seq, flags);
- replicas += !ptr->cached;
- }
-
- BUG_ON(replicas >= BCH_REPLICAS_MAX);
-
- if (replicas)
- stats->s[replicas - 1].data[type] += sectors;
-}
-
-void __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors, bool metadata,
- struct bch_fs_usage *stats,
- u64 journal_seq, unsigned flags)
-{
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED:
- bch2_mark_extent(c, bkey_s_c_to_extent(k), sectors, metadata,
- stats, journal_seq, flags);
- break;
- case BCH_RESERVATION: {
- struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
-
- if (r.v->nr_replicas)
- stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
- break;
- }
- }
-}
-
-void bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors, bool metadata, unsigned flags)
-{
- struct bch_fs_usage stats = { 0 };
-
- __bch2_mark_key(c, k, sectors, metadata, &stats, 0,
- flags|BCH_BUCKET_MARK_MAY_MAKE_UNAVAILABLE);
-
- preempt_disable();
- bch2_usage_add(this_cpu_ptr(c->usage_percpu), &stats);
- preempt_enable();
-}
-
void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
- s64 sectors, bool metadata, struct gc_pos gc_pos,
- struct bch_fs_usage *stats, u64 journal_seq)
+ s64 sectors, bool metadata,
+ struct gc_pos pos,
+ struct bch_fs_usage *stats,
+ u64 journal_seq, unsigned flags)
{
- unsigned flags = gc_will_visit(c, gc_pos)
- ? BCH_BUCKET_MARK_GC_WILL_VISIT : 0;
/*
* synchronization w.r.t. GC:
*
@@ -614,69 +605,104 @@ void bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
* To know whether we should mark a given reference (GC either isn't
* running, or has already marked references at this position) we
* construct a total order for everything GC walks. Then, we can simply
- * compare the position of the reference we're marking - @gc_pos - with
+ * compare the position of the reference we're marking - @pos - with
* GC's current position. If GC is going to mark this reference, GC's
- * current position will be less than @gc_pos; if GC's current position
- * is greater than @gc_pos GC has either already walked this position,
- * or isn't running.
+ * current position will be less than @pos; if GC's current position is
+ * greater than @pos GC has either already walked this position, or
+ * isn't running.
*
* To avoid racing with GC's position changing, we have to deal with
* - GC's position being set to GC_POS_MIN when GC starts:
* usage_lock guards against this
- * - GC's position overtaking @gc_pos: we guard against this with
+ * - GC's position overtaking @pos: we guard against this with
* whatever lock protects the data structure the reference lives in
* (e.g. the btree node lock, or the relevant allocator lock).
*/
+
lg_local_lock(&c->usage_lock);
- __bch2_mark_key(c, k, sectors, metadata, stats, journal_seq, flags);
- bch2_fs_stats_verify(c);
+ if (!(flags & BCH_BUCKET_MARK_GC_LOCK_HELD) &&
+ gc_will_visit(c, pos))
+ flags |= BCH_BUCKET_MARK_GC_WILL_VISIT;
+
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ const struct bch_extent_ptr *ptr;
+ struct bch_extent_crc_unpacked crc;
+ enum s_alloc type = metadata ? S_META : S_DIRTY;
+ unsigned replicas = 0;
+
+ BUG_ON(metadata && bkey_extent_is_cached(e.k));
+ BUG_ON(!sectors);
+
+ extent_for_each_ptr_crc(e, ptr, crc) {
+ bch2_mark_pointer(c, e, ptr, crc, sectors, type,
+ stats, journal_seq, flags);
+ replicas += !ptr->cached;
+ }
+
+ BUG_ON(replicas >= BCH_REPLICAS_MAX);
+
+ if (replicas)
+ stats->s[replicas - 1].data[type] += sectors;
+ break;
+ }
+ case BCH_RESERVATION: {
+ struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k);
+
+ if (r.v->nr_replicas)
+ stats->s[r.v->nr_replicas - 1].persistent_reserved += sectors;
+ break;
+ }
+ }
lg_local_unlock(&c->usage_lock);
}
-static u64 __recalc_sectors_available(struct bch_fs *c)
-{
- return c->capacity - bch2_fs_sectors_used(c);
-}
+/* Disk reservations: */
-/* Used by gc when it's starting: */
-void bch2_recalc_sectors_available(struct bch_fs *c)
+static u64 __recalc_sectors_available(struct bch_fs *c)
{
+ u64 avail;
int cpu;
- lg_global_lock(&c->usage_lock);
-
for_each_possible_cpu(cpu)
per_cpu_ptr(c->usage_percpu, cpu)->available_cache = 0;
- atomic64_set(&c->sectors_available,
- __recalc_sectors_available(c));
+ avail = c->capacity - bch2_fs_sectors_used(c);
+ avail <<= RESERVE_FACTOR;
+ avail /= (1 << RESERVE_FACTOR) + 1;
+ return avail;
+}
+
+/* Used by gc when it's starting: */
+void bch2_recalc_sectors_available(struct bch_fs *c)
+{
+ lg_global_lock(&c->usage_lock);
+ atomic64_set(&c->sectors_available, __recalc_sectors_available(c));
lg_global_unlock(&c->usage_lock);
}
-void bch2_disk_reservation_put(struct bch_fs *c,
- struct disk_reservation *res)
+void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
{
- if (res->sectors) {
- lg_local_lock(&c->usage_lock);
- this_cpu_sub(c->usage_percpu->online_reserved,
- res->sectors);
+ lg_local_lock(&c->usage_lock);
+ this_cpu_sub(c->usage_percpu->online_reserved,
+ res->sectors);
- bch2_fs_stats_verify(c);
- lg_local_unlock(&c->usage_lock);
+ bch2_fs_stats_verify(c);
+ lg_local_unlock(&c->usage_lock);
- res->sectors = 0;
- }
+ res->sectors = 0;
}
#define SECTORS_CACHE 1024
-int bch2_disk_reservation_add(struct bch_fs *c,
- struct disk_reservation *res,
- unsigned sectors, int flags)
+int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
+ unsigned sectors, int flags)
{
struct bch_fs_usage *stats;
- u64 old, new, v;
+ u64 old, v, get;
s64 sectors_available;
int ret;
@@ -685,27 +711,29 @@ int bch2_disk_reservation_add(struct bch_fs *c,
lg_local_lock(&c->usage_lock);
stats = this_cpu_ptr(c->usage_percpu);
- if (sectors >= stats->available_cache)
+ if (sectors <= stats->available_cache)
goto out;
v = atomic64_read(&c->sectors_available);
do {
old = v;
- if (old < sectors) {
+ get = min((u64) sectors + SECTORS_CACHE, old);
+
+ if (get < sectors) {
lg_local_unlock(&c->usage_lock);
goto recalculate;
}
-
- new = max_t(s64, 0, old - sectors - SECTORS_CACHE);
} while ((v = atomic64_cmpxchg(&c->sectors_available,
- old, new)) != old);
+ old, old - get)) != old);
+
+ stats->available_cache += get;
- stats->available_cache += old - new;
out:
stats->available_cache -= sectors;
stats->online_reserved += sectors;
res->sectors += sectors;
+ bch2_disk_reservations_verify(c, flags);
bch2_fs_stats_verify(c);
lg_local_unlock(&c->usage_lock);
return 0;
@@ -738,6 +766,8 @@ recalculate:
stats->online_reserved += sectors;
res->sectors += sectors;
ret = 0;
+
+ bch2_disk_reservations_verify(c, flags);
} else {
atomic64_set(&c->sectors_available, sectors_available);
ret = -ENOSPC;