summaryrefslogtreecommitdiff
path: root/fs/bcachefs/disk_accounting.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/disk_accounting.c')
-rw-r--r--fs/bcachefs/disk_accounting.c176
1 files changed, 106 insertions, 70 deletions
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index f0ebf91cd5fd..a99f821c6a1c 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -239,10 +239,12 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
c, accounting_key_junk_at_end,
"junk at end of accounting key");
- bkey_fsck_err_on(bch2_accounting_counters(k.k) != bch2_accounting_type_nr_counters[acc_k.type],
+ const unsigned nr_counters = bch2_accounting_counters(k.k);
+
+ bkey_fsck_err_on(!nr_counters || nr_counters > BCH_ACCOUNTING_MAX_COUNTERS,
c, accounting_key_nr_counters_wrong,
"accounting key with %u counters, should be %u",
- bch2_accounting_counters(k.k), bch2_accounting_type_nr_counters[acc_k.type]);
+ nr_counters, bch2_accounting_type_nr_counters[acc_k.type]);
fsck_err:
return ret;
}
@@ -359,10 +361,13 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun
accounting_pos_cmp, &a.k->p) < acc->k.nr)
return 0;
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, a.k->p);
+
struct accounting_mem_entry n = {
.pos = a.k->p,
.bversion = a.k->bversion,
- .nr_counters = bch2_accounting_counters(a.k),
+ .nr_counters = bch2_accounting_type_nr_counters[acc_k.type],
.v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64),
sizeof(u64), GFP_KERNEL),
};
@@ -878,46 +883,44 @@ int bch2_accounting_read(struct bch_fs *c)
*dst++ = *i;
keys->gap = keys->nr = dst - keys->data;
- guard(percpu_write)(&c->mark_lock);
-
- darray_for_each_reverse(acc->k, i) {
- struct disk_accounting_pos acc_k;
- bpos_to_disk_accounting_pos(&acc_k, i->pos);
+ CLASS(printbuf, underflow_err)();
- u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
- memset(v, 0, sizeof(v));
+ scoped_guard(percpu_write, &c->mark_lock) {
+ darray_for_each_reverse(acc->k, i) {
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, i->pos);
- for (unsigned j = 0; j < i->nr_counters; j++)
- v[j] = percpu_u64_get(i->v[0] + j);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ memset(v, 0, sizeof(v));
- /*
- * If the entry counters are zeroed, it should be treated as
- * nonexistent - it might point to an invalid device.
- *
- * Remove it, so that if it's re-added it gets re-marked in the
- * superblock:
- */
- ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
- ? -BCH_ERR_remove_disk_accounting_entry
- : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
-
- if (ret == -BCH_ERR_remove_disk_accounting_entry) {
- free_percpu(i->v[0]);
- free_percpu(i->v[1]);
- darray_remove_item(&acc->k, i);
- ret = 0;
- continue;
- }
+ for (unsigned j = 0; j < i->nr_counters; j++)
+ v[j] = percpu_u64_get(i->v[0] + j);
- if (ret)
- return ret;
- }
+ /*
+ * If the entry counters are zeroed, it should be treated as
+ * nonexistent - it might point to an invalid device.
+ *
+ * Remove it, so that if it's re-added it gets re-marked in the
+ * superblock:
+ */
+ ret = bch2_is_zero(v, sizeof(v[0]) * i->nr_counters)
+ ? -BCH_ERR_remove_disk_accounting_entry
+ : bch2_disk_accounting_validate_late(trans, &acc_k, v, i->nr_counters);
+
+ if (ret == -BCH_ERR_remove_disk_accounting_entry) {
+ free_percpu(i->v[0]);
+ free_percpu(i->v[1]);
+ darray_remove_item(&acc->k, i);
+ ret = 0;
+ continue;
+ }
- eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
- accounting_pos_cmp, NULL);
+ if (ret)
+ return ret;
+ }
- scoped_guard(preempt) {
- struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+ eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
+ accounting_pos_cmp, NULL);
for (unsigned i = 0; i < acc->k.nr; i++) {
struct disk_accounting_pos k;
@@ -939,27 +942,20 @@ int bch2_accounting_read(struct bch_fs *c)
underflow |= (s64) v[j] < 0;
if (underflow) {
- CLASS(printbuf, buf)();
- bch2_log_msg_start(c, &buf);
-
- prt_printf(&buf, "Accounting underflow for\n");
- bch2_accounting_key_to_text(&buf, &k);
+ if (!underflow_err.pos) {
+ bch2_log_msg_start(c, &underflow_err);
+ prt_printf(&underflow_err, "Accounting underflow for\n");
+ }
+ bch2_accounting_key_to_text(&underflow_err, &k);
for (unsigned j = 0; j < acc->k.data[i].nr_counters; j++)
- prt_printf(&buf, " %lli", v[j]);
-
- bool print = bch2_count_fsck_err(c, accounting_key_underflow, &buf);
- unsigned pos = buf.pos;
- ret = bch2_run_explicit_recovery_pass(c, &buf,
- BCH_RECOVERY_PASS_check_allocations, 0);
- print |= buf.pos != pos;
-
- if (print)
- bch2_print_str(c, KERN_ERR, buf.buf);
- if (ret)
- return ret;
+ prt_printf(&underflow_err, " %lli", v[j]);
+ prt_newline(&underflow_err);
}
+ guard(preempt)();
+ struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage);
+
switch (k.type) {
case BCH_DISK_ACCOUNTING_persistent_reserved:
usage->reserved += v[0] * k.persistent_reserved.nr_replicas;
@@ -986,24 +982,60 @@ int bch2_accounting_read(struct bch_fs *c)
}
}
+ if (underflow_err.pos) {
+ bool print = bch2_count_fsck_err(c, accounting_key_underflow, &underflow_err);
+ unsigned pos = underflow_err.pos;
+ ret = bch2_run_explicit_recovery_pass(c, &underflow_err,
+ BCH_RECOVERY_PASS_check_allocations, 0);
+ print |= underflow_err.pos != pos;
+
+ if (print)
+ bch2_print_str(c, KERN_ERR, underflow_err.buf);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
-int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev)
+int bch2_dev_usage_remove(struct bch_fs *c, struct bch_dev *ca)
{
CLASS(btree_trans, trans)(c);
+
+ struct disk_accounting_pos start;
+ disk_accounting_key_init(start, dev_data_type, .dev = ca->dev_idx);
+
+ struct disk_accounting_pos end;
+ disk_accounting_key_init(end, dev_data_type, .dev = ca->dev_idx, .data_type = U8_MAX);
+
return bch2_btree_write_buffer_flush_sync(trans) ?:
- for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN,
- BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({
- struct disk_accounting_pos acc;
- bpos_to_disk_accounting_pos(&acc, k.k->p);
-
- acc.type == BCH_DISK_ACCOUNTING_dev_data_type &&
- acc.dev_data_type.dev == dev
- ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0)
- : 0;
- })) ?:
- bch2_btree_write_buffer_flush_sync(trans);
+ commit_do(trans, NULL, NULL, 0, ({
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_accounting,
+ disk_accounting_pos_to_bpos(&start),
+ disk_accounting_pos_to_bpos(&end),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->type != KEY_TYPE_accounting)
+ continue;
+
+ struct disk_accounting_pos acc;
+ bpos_to_disk_accounting_pos(&acc, k.k->p);
+
+ const unsigned nr = bch2_accounting_counters(k.k);
+ u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
+ memcpy_u64s_small(v, bkey_s_c_to_accounting(k).v->d, nr);
+
+ bch2_u64s_neg(v, nr);
+
+ ret = bch2_disk_accounting_mod(trans, &acc, v, nr, false);
+ if (ret)
+ break;
+ }
+
+ ret;
+ })) ?: bch2_btree_write_buffer_flush_sync(trans);
}
int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
@@ -1074,13 +1106,17 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
case BCH_DISK_ACCOUNTING_dev_data_type: {
{
guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */
+ const enum bch_data_type data_type = acc_k.dev_data_type.data_type;
struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev);
if (!ca)
continue;
- v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets);
- v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors);
- v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented);
+ v[0] = percpu_u64_get(&ca->usage->d[data_type].buckets);
+ v[1] = percpu_u64_get(&ca->usage->d[data_type].sectors);
+ v[2] = percpu_u64_get(&ca->usage->d[data_type].fragmented);
+
+ if (data_type == BCH_DATA_sb || data_type == BCH_DATA_journal)
+ base.hidden += a.v->d[0] * ca->mi.bucket_size;
}
if (memcmp(a.v->d, v, 3 * sizeof(u64))) {
@@ -1108,7 +1144,7 @@ void bch2_verify_accounting_clean(struct bch_fs *c)
mismatch = true; \
}
- //check(hidden);
+ check(hidden);
check(btree);
check(data);
check(cached);