summaryrefslogtreecommitdiff
path: root/libbcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-01-05 12:38:14 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2024-01-05 13:01:34 -0500
commit799439a88ab7afe99e5052894c20ea77133a1551 (patch)
treea9c5a0bd777e2e0d616bcbf44d8b1f3ef0d4ae1a /libbcachefs
parent605e2311d9cfbc1acc7ba9181a05b8976d42ea46 (diff)
Update bcachefs sources to d267e10a43b2 bcachefs: __bch2_sb_field_to_text()
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'libbcachefs')
-rw-r--r--libbcachefs/alloc_background.c225
-rw-r--r--libbcachefs/alloc_background.h16
-rw-r--r--libbcachefs/backpointers.c3
-rw-r--r--libbcachefs/bcachefs.h26
-rw-r--r--libbcachefs/bcachefs_format.h84
-rw-r--r--libbcachefs/bkey_methods.h80
-rw-r--r--libbcachefs/btree_cache.c6
-rw-r--r--libbcachefs/btree_gc.c15
-rw-r--r--libbcachefs/btree_io.c4
-rw-r--r--libbcachefs/btree_iter.c35
-rw-r--r--libbcachefs/btree_locking.c19
-rw-r--r--libbcachefs/btree_trans_commit.c33
-rw-r--r--libbcachefs/btree_update_interior.c19
-rw-r--r--libbcachefs/buckets.c1044
-rw-r--r--libbcachefs/buckets.h37
-rw-r--r--libbcachefs/chardev.c236
-rw-r--r--libbcachefs/darray.h2
-rw-r--r--libbcachefs/debug.c6
-rw-r--r--libbcachefs/disk_groups.c2
-rw-r--r--libbcachefs/ec.c301
-rw-r--r--libbcachefs/ec.h5
-rw-r--r--libbcachefs/error.c93
-rw-r--r--libbcachefs/extents.h12
-rw-r--r--libbcachefs/fs-common.c36
-rw-r--r--libbcachefs/fs-io.c10
-rw-r--r--libbcachefs/fsck.c16
-rw-r--r--libbcachefs/inode.c72
-rw-r--r--libbcachefs/inode.h15
-rw-r--r--libbcachefs/opts.h12
-rw-r--r--libbcachefs/printbuf.c2
-rw-r--r--libbcachefs/recovery.c40
-rw-r--r--libbcachefs/reflink.c175
-rw-r--r--libbcachefs/reflink.h26
-rw-r--r--libbcachefs/sb-downgrade.c111
-rw-r--r--libbcachefs/sb-downgrade.h3
-rw-r--r--libbcachefs/sb-errors_types.h3
-rw-r--r--libbcachefs/sb-members.c2
-rw-r--r--libbcachefs/snapshot.c31
-rw-r--r--libbcachefs/snapshot.h4
-rw-r--r--libbcachefs/super-io.c54
-rw-r--r--libbcachefs/super-io.h8
-rw-r--r--libbcachefs/super.c70
-rw-r--r--libbcachefs/thread_with_file.c299
-rw-r--r--libbcachefs/thread_with_file.h41
-rw-r--r--libbcachefs/thread_with_file_types.h16
-rw-r--r--libbcachefs/trace.h26
-rw-r--r--libbcachefs/util.c8
-rw-r--r--libbcachefs/util.h13
48 files changed, 1761 insertions, 1635 deletions
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 1a127b0a..a09b9d00 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -749,95 +749,177 @@ static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
return ret;
}
-int bch2_trans_mark_alloc(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
- unsigned flags)
+int bch2_trigger_alloc(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
{
struct bch_fs *c = trans->c;
- struct bch_alloc_v4 old_a_convert, *new_a;
- const struct bch_alloc_v4 *old_a;
- u64 old_lru, new_lru;
int ret = 0;
- /*
- * Deletion only happens in the device removal path, with
- * BTREE_TRIGGER_NORUN:
- */
- BUG_ON(new->k.type != KEY_TYPE_alloc_v4);
+ if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
+ "alloc key for invalid device or bucket"))
+ return -EIO;
- old_a = bch2_alloc_to_v4(old, &old_a_convert);
- new_a = &bkey_i_to_alloc_v4(new)->v;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, new.k->p.inode);
- new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
+ struct bch_alloc_v4 old_a_convert;
+ const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
- if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
- new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
- SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
- SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
- }
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
- if (data_type_is_empty(new_a->data_type) &&
- BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
- !bch2_bucket_is_open_safe(c, new->k.p.inode, new->k.p.offset)) {
- new_a->gen++;
- SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
- }
+ new_a->data_type = alloc_data_type(*new_a, new_a->data_type);
- if (old_a->data_type != new_a->data_type ||
- (new_a->data_type == BCH_DATA_free &&
- alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
- ret = bch2_bucket_do_index(trans, old, old_a, false) ?:
- bch2_bucket_do_index(trans, bkey_i_to_s_c(new), new_a, true);
- if (ret)
- return ret;
- }
+ if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) {
+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+ new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
+ SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
+ }
- if (new_a->data_type == BCH_DATA_cached &&
- !new_a->io_time[READ])
- new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
+ if (data_type_is_empty(new_a->data_type) &&
+ BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
+ !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
+ new_a->gen++;
+ SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
+ }
- old_lru = alloc_lru_idx_read(*old_a);
- new_lru = alloc_lru_idx_read(*new_a);
+ if (old_a->data_type != new_a->data_type ||
+ (new_a->data_type == BCH_DATA_free &&
+ alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
+ ret = bch2_bucket_do_index(trans, old, old_a, false) ?:
+ bch2_bucket_do_index(trans, new.s_c, new_a, true);
+ if (ret)
+ return ret;
+ }
- if (old_lru != new_lru) {
- ret = bch2_lru_change(trans, new->k.p.inode,
- bucket_to_u64(new->k.p),
- old_lru, new_lru);
- if (ret)
- return ret;
- }
+ if (new_a->data_type == BCH_DATA_cached &&
+ !new_a->io_time[READ])
+ new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
- new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
- bch_dev_bkey_exists(c, new->k.p.inode));
+ u64 old_lru = alloc_lru_idx_read(*old_a);
+ u64 new_lru = alloc_lru_idx_read(*new_a);
+ if (old_lru != new_lru) {
+ ret = bch2_lru_change(trans, new.k->p.inode,
+ bucket_to_u64(new.k->p),
+ old_lru, new_lru);
+ if (ret)
+ return ret;
+ }
- if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
- ret = bch2_lru_change(trans,
- BCH_LRU_FRAGMENTATION_START,
- bucket_to_u64(new->k.p),
- old_a->fragmentation_lru, new_a->fragmentation_lru);
- if (ret)
- return ret;
+ new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a,
+ bch_dev_bkey_exists(c, new.k->p.inode));
+ if (old_a->fragmentation_lru != new_a->fragmentation_lru) {
+ ret = bch2_lru_change(trans,
+ BCH_LRU_FRAGMENTATION_START,
+ bucket_to_u64(new.k->p),
+ old_a->fragmentation_lru, new_a->fragmentation_lru);
+ if (ret)
+ return ret;
+ }
+
+ if (old_a->gen != new_a->gen) {
+ ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * need to know if we're getting called from the invalidate path or
+ * not:
+ */
+
+ if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
+ old_a->cached_sectors) {
+ ret = bch2_update_cached_sectors_list(trans, new.k->p.inode,
+ -((s64) old_a->cached_sectors));
+ if (ret)
+ return ret;
+ }
}
- if (old_a->gen != new_a->gen) {
- ret = bch2_bucket_gen_update(trans, new->k.p, new_a->gen);
- if (ret)
- return ret;
+ if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+ struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v;
+ u64 journal_seq = trans->journal_res.seq;
+ u64 bucket_journal_seq = new_a->journal_seq;
+
+ if ((flags & BTREE_TRIGGER_INSERT) &&
+ data_type_is_empty(old_a->data_type) !=
+ data_type_is_empty(new_a->data_type) &&
+ new.k->type == KEY_TYPE_alloc_v4) {
+ struct bch_alloc_v4 *v = bkey_s_to_alloc_v4(new).v;
+
+ /*
+ * If the btree updates referring to a bucket weren't flushed
+ * before the bucket became empty again, then the we don't have
+ * to wait on a journal flush before we can reuse the bucket:
+ */
+ v->journal_seq = bucket_journal_seq =
+ data_type_is_empty(new_a->data_type) &&
+ (journal_seq == v->journal_seq ||
+ bch2_journal_noflush_seq(&c->journal, v->journal_seq))
+ ? 0 : journal_seq;
+ }
+
+ if (!data_type_is_empty(old_a->data_type) &&
+ data_type_is_empty(new_a->data_type) &&
+ bucket_journal_seq) {
+ ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+ c->journal.flushed_seq_ondisk,
+ new.k->p.inode, new.k->p.offset,
+ bucket_journal_seq);
+ if (ret) {
+ bch2_fs_fatal_error(c,
+ "error setting bucket_needs_journal_commit: %i", ret);
+ return ret;
+ }
+ }
+
+ percpu_down_read(&c->mark_lock);
+ if (new_a->gen != old_a->gen)
+ *bucket_gen(ca, new.k->p.offset) = new_a->gen;
+
+ bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
+
+ if (new_a->data_type == BCH_DATA_free &&
+ (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
+ closure_wake_up(&c->freelist_wait);
+
+ if (new_a->data_type == BCH_DATA_need_discard &&
+ (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
+ bch2_do_discards(c);
+
+ if (old_a->data_type != BCH_DATA_cached &&
+ new_a->data_type == BCH_DATA_cached &&
+ should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
+ bch2_do_invalidates(c);
+
+ if (new_a->data_type == BCH_DATA_need_gc_gens)
+ bch2_do_gc_gens(c);
+ percpu_up_read(&c->mark_lock);
}
- /*
- * need to know if we're getting called from the invalidate path or
- * not:
- */
+ if ((flags & BTREE_TRIGGER_GC) &&
+ (flags & BTREE_TRIGGER_BUCKET_INVALIDATE)) {
+ struct bch_alloc_v4 new_a_convert;
+ const struct bch_alloc_v4 *new_a = bch2_alloc_to_v4(new.s_c, &new_a_convert);
- if ((flags & BTREE_TRIGGER_BUCKET_INVALIDATE) &&
- old_a->cached_sectors) {
- ret = bch2_update_cached_sectors_list(trans, new->k.p.inode,
- -((s64) old_a->cached_sectors));
- if (ret)
- return ret;
+ percpu_down_read(&c->mark_lock);
+ struct bucket *g = gc_bucket(ca, new.k->p.offset);
+
+ bucket_lock(g);
+
+ g->gen_valid = 1;
+ g->gen = new_a->gen;
+ g->data_type = new_a->data_type;
+ g->stripe = new_a->stripe;
+ g->stripe_redundancy = new_a->stripe_redundancy;
+ g->dirty_sectors = new_a->dirty_sectors;
+ g->cached_sectors = new_a->cached_sectors;
+
+ bucket_unlock(g);
+ percpu_up_read(&c->mark_lock);
}
return 0;
@@ -1150,9 +1232,6 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
unsigned i, gens_offset, gens_end_offset;
int ret;
- if (c->sb.version < bcachefs_metadata_version_bucket_gens)
- return 0;
-
bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
k = bch2_btree_iter_peek_slot(bucket_gens_iter);
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index 96671f16..e7f7e842 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -182,24 +182,21 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
#define bch2_bkey_ops_alloc ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v1_invalid, \
.val_to_text = bch2_alloc_to_text, \
- .trans_trigger = bch2_trans_mark_alloc, \
- .atomic_trigger = bch2_mark_alloc, \
+ .trigger = bch2_trigger_alloc, \
.min_val_size = 8, \
})
#define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v2_invalid, \
.val_to_text = bch2_alloc_to_text, \
- .trans_trigger = bch2_trans_mark_alloc, \
- .atomic_trigger = bch2_mark_alloc, \
+ .trigger = bch2_trigger_alloc, \
.min_val_size = 8, \
})
#define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \
.key_invalid = bch2_alloc_v3_invalid, \
.val_to_text = bch2_alloc_to_text, \
- .trans_trigger = bch2_trans_mark_alloc, \
- .atomic_trigger = bch2_mark_alloc, \
+ .trigger = bch2_trigger_alloc, \
.min_val_size = 16, \
})
@@ -207,8 +204,7 @@ void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.key_invalid = bch2_alloc_v4_invalid, \
.val_to_text = bch2_alloc_to_text, \
.swab = bch2_alloc_v4_swab, \
- .trans_trigger = bch2_trans_mark_alloc, \
- .atomic_trigger = bch2_mark_alloc, \
+ .trigger = bch2_trigger_alloc, \
.min_val_size = 48, \
})
@@ -232,8 +228,8 @@ static inline bool bkey_is_alloc(const struct bkey *k)
int bch2_alloc_read(struct bch_fs *);
-int bch2_trans_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *, unsigned);
+int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
int bch2_check_alloc_info(struct bch_fs *);
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
void bch2_do_discards(struct bch_fs *);
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index a97fc2b6..e358a2ff 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -467,8 +467,7 @@ missing:
prt_printf(&buf, "\nbp pos ");
bch2_bpos_to_text(&buf, bp_iter.pos);
- if (c->sb.version_upgrade_complete < bcachefs_metadata_version_backpointers ||
- c->opts.reconstruct_alloc ||
+ if (c->opts.reconstruct_alloc ||
fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 840f605e..dac383e3 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -425,6 +425,7 @@ BCH_DEBUG_PARAMS_DEBUG()
x(btree_node_merge) \
x(btree_node_sort) \
x(btree_node_read) \
+ x(btree_node_read_done) \
x(btree_interior_update_foreground) \
x(btree_interior_update_total) \
x(btree_gc) \
@@ -464,6 +465,7 @@ enum bch_time_stats {
#include "replicas_types.h"
#include "subvolume_types.h"
#include "super_types.h"
+#include "thread_with_file_types.h"
/* Number of nodes btree coalesce will try to coalesce at once */
#define GC_MERGE_NODES 4U
@@ -478,12 +480,6 @@ enum bch_time_stats {
struct btree;
-struct log_output {
- spinlock_t lock;
- wait_queue_head_t wait;
- struct printbuf buf;
-};
-
enum gc_phase {
GC_PHASE_NOT_RUNNING,
GC_PHASE_START,
@@ -607,9 +603,6 @@ struct bch_dev {
};
/*
- * fsck_done - kill?
- *
- * replace with something more general from enumated fsck passes/errors:
* initial_gc_unfixed
* error
* topology error
@@ -625,7 +618,7 @@ struct bch_dev {
x(going_ro) \
x(write_disable_complete) \
x(clean_shutdown) \
- x(fsck_done) \
+ x(fsck_running) \
x(initial_gc_unfixed) \
x(need_another_gc) \
x(need_delete_dead_snapshots) \
@@ -739,8 +732,8 @@ struct bch_fs {
struct super_block *vfs_sb;
dev_t dev;
char name[40];
- struct log_output *output;
- struct task_struct *output_filter;
+ struct stdio_redirect *stdio;
+ struct task_struct *stdio_filter;
/* ro/rw, add/remove/resize devices: */
struct rw_semaphore state_lock;
@@ -1252,6 +1245,15 @@ static inline bool bch2_dev_exists2(const struct bch_fs *c, unsigned dev)
return dev < c->sb.nr_devices && c->devs[dev];
}
+static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
+{
+ struct stdio_redirect *stdio = c->stdio;
+
+ if (c->stdio_filter && c->stdio_filter != current)
+ stdio = NULL;
+ return stdio;
+}
+
#define BKEY_PADDED_ONSTACK(key, pad) \
struct { struct bkey_i key; __u64 key ## _pad[pad]; }
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index e7a2d25d..0d5ac418 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -1672,73 +1672,41 @@ struct bch_sb_field_downgrade {
#define BCH_VERSION_MINOR(_v) ((__u16) ((_v) & ~(~0U << 10)))
#define BCH_VERSION(_major, _minor) (((_major) << 10)|(_minor) << 0)
-#define RECOVERY_PASS_ALL_FSCK (1ULL << 63)
-
/*
* field 1: version name
* field 2: BCH_VERSION(major, minor)
* field 3: recovery passess required on upgrade
*/
#define BCH_METADATA_VERSIONS() \
- x(bkey_renumber, BCH_VERSION(0, 10), \
- RECOVERY_PASS_ALL_FSCK) \
- x(inode_btree_change, BCH_VERSION(0, 11), \
- RECOVERY_PASS_ALL_FSCK) \
- x(snapshot, BCH_VERSION(0, 12), \
- RECOVERY_PASS_ALL_FSCK) \
- x(inode_backpointers, BCH_VERSION(0, 13), \
- RECOVERY_PASS_ALL_FSCK) \
- x(btree_ptr_sectors_written, BCH_VERSION(0, 14), \
- RECOVERY_PASS_ALL_FSCK) \
- x(snapshot_2, BCH_VERSION(0, 15), \
- BIT_ULL(BCH_RECOVERY_PASS_fs_upgrade_for_subvolumes)| \
- BIT_ULL(BCH_RECOVERY_PASS_initialize_subvolumes)| \
- RECOVERY_PASS_ALL_FSCK) \
- x(reflink_p_fix, BCH_VERSION(0, 16), \
- BIT_ULL(BCH_RECOVERY_PASS_fix_reflink_p)) \
- x(subvol_dirent, BCH_VERSION(0, 17), \
- RECOVERY_PASS_ALL_FSCK) \
- x(inode_v2, BCH_VERSION(0, 18), \
- RECOVERY_PASS_ALL_FSCK) \
- x(freespace, BCH_VERSION(0, 19), \
- RECOVERY_PASS_ALL_FSCK) \
- x(alloc_v4, BCH_VERSION(0, 20), \
- RECOVERY_PASS_ALL_FSCK) \
- x(new_data_types, BCH_VERSION(0, 21), \
- RECOVERY_PASS_ALL_FSCK) \
- x(backpointers, BCH_VERSION(0, 22), \
- RECOVERY_PASS_ALL_FSCK) \
- x(inode_v3, BCH_VERSION(0, 23), \
- RECOVERY_PASS_ALL_FSCK) \
- x(unwritten_extents, BCH_VERSION(0, 24), \
- RECOVERY_PASS_ALL_FSCK) \
- x(bucket_gens, BCH_VERSION(0, 25), \
- BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
- RECOVERY_PASS_ALL_FSCK) \
- x(lru_v2, BCH_VERSION(0, 26), \
- RECOVERY_PASS_ALL_FSCK) \
- x(fragmentation_lru, BCH_VERSION(0, 27), \
- RECOVERY_PASS_ALL_FSCK) \
- x(no_bps_in_alloc_keys, BCH_VERSION(0, 28), \
- RECOVERY_PASS_ALL_FSCK) \
- x(snapshot_trees, BCH_VERSION(0, 29), \
- RECOVERY_PASS_ALL_FSCK) \
- x(major_minor, BCH_VERSION(1, 0), \
- 0) \
- x(snapshot_skiplists, BCH_VERSION(1, 1), \
- BIT_ULL(BCH_RECOVERY_PASS_check_snapshots)) \
- x(deleted_inodes, BCH_VERSION(1, 2), \
- BIT_ULL(BCH_RECOVERY_PASS_check_inodes)) \
- x(rebalance_work, BCH_VERSION(1, 3), \
- BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
- x(member_seq, BCH_VERSION(1, 4), \
- 0) \
- x(disk_accounting_v2, BCH_VERSION(1, 5), \
- BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info))
+ x(bkey_renumber, BCH_VERSION(0, 10)) \
+ x(inode_btree_change, BCH_VERSION(0, 11)) \
+ x(snapshot, BCH_VERSION(0, 12)) \
+ x(inode_backpointers, BCH_VERSION(0, 13)) \
+ x(btree_ptr_sectors_written, BCH_VERSION(0, 14)) \
+ x(snapshot_2, BCH_VERSION(0, 15)) \
+ x(reflink_p_fix, BCH_VERSION(0, 16)) \
+ x(subvol_dirent, BCH_VERSION(0, 17)) \
+ x(inode_v2, BCH_VERSION(0, 18)) \
+ x(freespace, BCH_VERSION(0, 19)) \
+ x(alloc_v4, BCH_VERSION(0, 20)) \
+ x(new_data_types, BCH_VERSION(0, 21)) \
+ x(backpointers, BCH_VERSION(0, 22)) \
+ x(inode_v3, BCH_VERSION(0, 23)) \
+ x(unwritten_extents, BCH_VERSION(0, 24)) \
+ x(bucket_gens, BCH_VERSION(0, 25)) \
+ x(lru_v2, BCH_VERSION(0, 26)) \
+ x(fragmentation_lru, BCH_VERSION(0, 27)) \
+ x(no_bps_in_alloc_keys, BCH_VERSION(0, 28)) \
+ x(snapshot_trees, BCH_VERSION(0, 29)) \
+ x(major_minor, BCH_VERSION(1, 0)) \
+ x(snapshot_skiplists, BCH_VERSION(1, 1)) \
+ x(deleted_inodes, BCH_VERSION(1, 2)) \
+ x(rebalance_work, BCH_VERSION(1, 3)) \
+ x(member_seq, BCH_VERSION(1, 4))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
-#define x(t, n, upgrade_passes) bcachefs_metadata_version_##t = n,
+#define x(t, n) bcachefs_metadata_version_##t = n,
BCH_METADATA_VERSIONS()
#undef x
bcachefs_metadata_version_max
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index 912adadf..ee822837 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -28,10 +28,8 @@ struct bkey_ops {
void (*swab)(struct bkey_s);
bool (*key_normalize)(struct bch_fs *, struct bkey_s);
bool (*key_merge)(struct bch_fs *, struct bkey_s, struct bkey_s_c);
- int (*trans_trigger)(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *, unsigned);
- int (*atomic_trigger)(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
+ int (*trigger)(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
void (*compat)(enum btree_id id, unsigned version,
unsigned big_endian, int write,
struct bkey_s);
@@ -78,82 +76,86 @@ static inline bool bch2_bkey_maybe_mergable(const struct bkey *l, const struct b
bool bch2_bkey_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-static inline int bch2_mark_key(struct btree_trans *trans,
- enum btree_id btree, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
-
- return ops->atomic_trigger
- ? ops->atomic_trigger(trans, btree, level, old, new, flags)
- : 0;
-}
-
enum btree_update_flags {
__BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE = __BTREE_ITER_FLAGS_END,
__BTREE_UPDATE_NOJOURNAL,
__BTREE_UPDATE_KEY_CACHE_RECLAIM,
- __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
-
+ __BTREE_TRIGGER_NORUN,
+ __BTREE_TRIGGER_TRANSACTIONAL,
__BTREE_TRIGGER_INSERT,
__BTREE_TRIGGER_OVERWRITE,
-
__BTREE_TRIGGER_GC,
__BTREE_TRIGGER_BUCKET_INVALIDATE,
- __BTREE_TRIGGER_NOATOMIC,
};
#define BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE (1U << __BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE)
#define BTREE_UPDATE_NOJOURNAL (1U << __BTREE_UPDATE_NOJOURNAL)
#define BTREE_UPDATE_KEY_CACHE_RECLAIM (1U << __BTREE_UPDATE_KEY_CACHE_RECLAIM)
+/* Don't run triggers at all */
#define BTREE_TRIGGER_NORUN (1U << __BTREE_TRIGGER_NORUN)
+/*
+ * If set, we're running transactional triggers as part of a transaction commit:
+ * triggers may generate new updates
+ *
+ * If cleared, and either BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE are set,
+ * we're running atomic triggers during a transaction commit: we have our
+ * journal reservation, we're holding btree node write locks, and we know the
+ * transaction is going to commit (returning an error here is a fatal error,
+ * causing us to go emergency read-only)
+ */
+#define BTREE_TRIGGER_TRANSACTIONAL (1U << __BTREE_TRIGGER_TRANSACTIONAL)
+
+/* @new is entering the btree */
#define BTREE_TRIGGER_INSERT (1U << __BTREE_TRIGGER_INSERT)
+
+/* @old is leaving the btree */
#define BTREE_TRIGGER_OVERWRITE (1U << __BTREE_TRIGGER_OVERWRITE)
+/* We're in gc/fsck: running triggers to recalculate e.g. disk usage */
#define BTREE_TRIGGER_GC (1U << __BTREE_TRIGGER_GC)
+
+/* signal from bucket invalidate path to alloc trigger */
#define BTREE_TRIGGER_BUCKET_INVALIDATE (1U << __BTREE_TRIGGER_BUCKET_INVALIDATE)
-#define BTREE_TRIGGER_NOATOMIC (1U << __BTREE_TRIGGER_NOATOMIC)
-static inline int bch2_trans_mark_key(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
- unsigned flags)
+static inline int bch2_key_trigger(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
{
- const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new->k.type);
+ const struct bkey_ops *ops = bch2_bkey_type_ops(old.k->type ?: new.k->type);
- return ops->trans_trigger
- ? ops->trans_trigger(trans, btree_id, level, old, new, flags)
+ return ops->trigger
+ ? ops->trigger(trans, btree, level, old, new, flags)
: 0;
}
-static inline int bch2_trans_mark_old(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, unsigned flags)
+static inline int bch2_key_trigger_old(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
deleted.k.p = old.k->p;
- return bch2_trans_mark_key(trans, btree_id, level, old, &deleted,
- BTREE_TRIGGER_OVERWRITE|flags);
+ return bch2_key_trigger(trans, btree_id, level, old, bkey_i_to_s(&deleted),
+ BTREE_TRIGGER_OVERWRITE|flags);
}
-static inline int bch2_trans_mark_new(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_i *new, unsigned flags)
+static inline int bch2_key_trigger_new(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s new, unsigned flags)
{
struct bkey_i deleted;
bkey_init(&deleted.k);
- deleted.k.p = new->k.p;
+ deleted.k.p = new.k->p;
- return bch2_trans_mark_key(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
- BTREE_TRIGGER_INSERT|flags);
+ return bch2_key_trigger(trans, btree_id, level, bkey_i_to_s_c(&deleted), new,
+ BTREE_TRIGGER_INSERT|flags);
}
void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int);
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 9574c8c4..8e2488a4 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -719,12 +719,6 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
if (IS_ERR(b))
return b;
- /*
- * Btree nodes read in from disk should not have the accessed bit set
- * initially, so that linear scans don't thrash the cache:
- */
- clear_btree_node_accessed(b);
-
bkey_copy(&b->key, k);
if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) {
/* raced with another fill: */
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 9f27cb3e..49b4ade7 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -41,6 +41,14 @@
#define DROP_THIS_NODE 10
#define DROP_PREV_NODE 11
+static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
+{
+ return (struct bkey_s) {{{
+ (struct bkey *) k.k,
+ (struct bch_val *) k.v
+ }}};
+}
+
static bool should_restart_for_topology_repair(struct bch_fs *c)
{
return c->opts.fix_errors != FSCK_FIX_no &&
@@ -805,9 +813,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
struct bch_fs *c = trans->c;
struct bkey deleted = KEY(0, 0, 0);
struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
- unsigned flags =
- BTREE_TRIGGER_GC|
- (initial ? BTREE_TRIGGER_NOATOMIC : 0);
int ret = 0;
deleted.p = k->k->p;
@@ -829,7 +834,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
}
ret = commit_do(trans, NULL, NULL, 0,
- bch2_mark_key(trans, btree_id, level, old, *k, flags));
+ bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
fsck_err:
err:
bch_err_fn(c, ret);
@@ -1589,7 +1594,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans,
if (!r->refcount)
new->k.type = KEY_TYPE_deleted;
else
- *bkey_refcount(new) = cpu_to_le64(r->refcount);
+ *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount);
}
fsck_err:
printbuf_exit(&buf);
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 38d27cae..378579bb 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -942,6 +942,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned ptr_written = btree_ptr_sectors_written(&b->key);
struct printbuf buf = PRINTBUF;
int ret = 0, retry_read = 0, write = READ;
+ u64 start_time = local_clock();
b->version_ondisk = U16_MAX;
/* We might get called multiple times on read retry: */
@@ -1209,6 +1210,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
out:
mempool_free(iter, &c->fill_iter);
printbuf_exit(&buf);
+ bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time);
return retry_read;
fsck_err:
if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
@@ -1645,7 +1647,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
if (sync) {
submit_bio_wait(bio);
-
+ bch2_latency_acct(ca, rb->start_time, READ);
btree_node_read_work(&rb->work);
} else {
submit_bio(bio);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 7e5c797c..6e8e9ba5 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -897,7 +897,8 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
bch2_bkey_buf_reassemble(out, c, k);
- if (flags & BTREE_ITER_PREFETCH)
+ if ((flags & BTREE_ITER_PREFETCH) &&
+ c->opts.btree_node_prefetch)
ret = btree_path_prefetch_j(trans, path, &jiter);
bch2_btree_and_journal_iter_exit(&jiter);
@@ -929,7 +930,8 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
- if (flags & BTREE_ITER_PREFETCH) {
+ if ((flags & BTREE_ITER_PREFETCH) &&
+ c->opts.btree_node_prefetch) {
ret = btree_path_prefetch(trans, path);
if (ret)
goto err;
@@ -2816,11 +2818,34 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p;
}
+#include "sb-members.h"
+
static inline void check_srcu_held_too_long(struct btree_trans *trans)
{
- WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
- "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
- (jiffies - trans->srcu_lock_time) / HZ);
+ if (trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10)) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "btree node read time:\n");
+ bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read]);
+
+ prt_str(&buf, "btree node read_done time:\n");
+ bch2_time_stats_to_text(&buf, &trans->c->times[BCH_TIME_btree_node_read_done]);
+
+ for_each_member_device(trans->c, ca) {
+ prt_printf(&buf, "device %u read time:\n", ca->dev_idx);
+ bch2_time_stats_to_text(&buf, &ca->io_latency[READ]);
+ }
+
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
+ prt_str(&buf, "transaction duration:\n");
+ bch2_time_stats_to_text(&buf, &s->duration);
+
+ WARN(trans->srcu_held && time_after(jiffies, trans->srcu_lock_time + HZ * 10),
+ "btree trans held srcu lock (delaying memory reclaim) for %lu seconds",
+ (jiffies - trans->srcu_lock_time) / HZ);
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
}
void bch2_trans_srcu_unlock(struct btree_trans *trans)
diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c
index 1ed8327a..2d1c95c4 100644
--- a/libbcachefs/btree_locking.c
+++ b/libbcachefs/btree_locking.c
@@ -86,8 +86,14 @@ static noinline void print_cycle(struct printbuf *out, struct lock_graph *g)
prt_printf(out, "Found lock cycle (%u entries):", g->nr);
prt_newline(out);
- for (i = g->g; i < g->g + g->nr; i++)
+ for (i = g->g; i < g->g + g->nr; i++) {
+ struct task_struct *task = READ_ONCE(i->trans->locking_wait.task);
+ if (!task)
+ continue;
+
bch2_btree_trans_to_text(out, i->trans);
+ bch2_prt_task_backtrace(out, task, i == g->g ? 5 : 1);
+ }
}
static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
@@ -144,8 +150,7 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g)
return false;
}
-static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans,
- unsigned long ip)
+static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -157,7 +162,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans
buf.atomic++;
print_cycle(&buf, g);
- trace_trans_restart_would_deadlock(trans, ip, buf.buf);
+ trace_trans_restart_would_deadlock(trans, buf.buf);
printbuf_exit(&buf);
}
}
@@ -165,7 +170,7 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
{
if (i == g->g) {
- trace_would_deadlock(g, i->trans, _RET_IP_);
+ trace_would_deadlock(g, i->trans);
return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
} else {
i->trans->lock_must_abort = true;
@@ -222,7 +227,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
prt_printf(&buf, "backtrace:");
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
- bch2_prt_task_backtrace(&buf, trans->locking_wait.task);
+ bch2_prt_task_backtrace(&buf, trans->locking_wait.task, 2);
printbuf_indent_sub(&buf, 2);
prt_newline(&buf);
}
@@ -291,7 +296,7 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
if (cycle)
return -1;
- trace_would_deadlock(&g, trans, _RET_IP_);
+ trace_would_deadlock(&g, trans);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
}
diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c
index 3472882b..80505554 100644
--- a/libbcachefs/btree_trans_commit.c
+++ b/libbcachefs/btree_trans_commit.c
@@ -451,20 +451,15 @@ static int run_one_mem_trigger(struct btree_trans *trans,
if (!btree_node_type_needs_gc(__btree_node_type(i->level, i->btree_id)))
return 0;
- if (old_ops->atomic_trigger == new_ops->atomic_trigger) {
- ret = bch2_mark_key(trans, i->btree_id, i->level,
- old, bkey_i_to_s_c(new),
+ if (old_ops->trigger == new_ops->trigger) {
+ ret = bch2_key_trigger(trans, i->btree_id, i->level,
+ old, bkey_i_to_s(new),
BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
} else {
- struct bkey _deleted = POS_KEY((trans->paths + i->path)->pos);
- struct bkey_s_c deleted = (struct bkey_s_c) { &_deleted, NULL };
-
- ret = bch2_mark_key(trans, i->btree_id, i->level,
- deleted, bkey_i_to_s_c(new),
- BTREE_TRIGGER_INSERT|flags) ?:
- bch2_mark_key(trans, i->btree_id, i->level,
- old, deleted,
- BTREE_TRIGGER_OVERWRITE|flags);
+ ret = bch2_key_trigger_new(trans, i->btree_id, i->level,
+ bkey_i_to_s(new), flags) ?:
+ bch2_key_trigger_old(trans, i->btree_id, i->level,
+ old, flags);
}
return ret;
@@ -482,6 +477,7 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
struct bkey_s_c old = { &old_k, i->old_v };
const struct bkey_ops *old_ops = bch2_bkey_type_ops(old.k->type);
const struct bkey_ops *new_ops = bch2_bkey_type_ops(i->k->k.type);
+ unsigned flags = i->flags|BTREE_TRIGGER_TRANSACTIONAL;
verify_update_old_key(trans, i);
@@ -491,19 +487,18 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
if (!i->insert_trigger_run &&
!i->overwrite_trigger_run &&
- old_ops->trans_trigger == new_ops->trans_trigger) {
+ old_ops->trigger == new_ops->trigger) {
i->overwrite_trigger_run = true;
i->insert_trigger_run = true;
- return bch2_trans_mark_key(trans, i->btree_id, i->level, old, i->k,
- BTREE_TRIGGER_INSERT|
- BTREE_TRIGGER_OVERWRITE|
- i->flags) ?: 1;
+ return bch2_key_trigger(trans, i->btree_id, i->level, old, bkey_i_to_s(i->k),
+ BTREE_TRIGGER_INSERT|
+ BTREE_TRIGGER_OVERWRITE|flags) ?: 1;
} else if (overwrite && !i->overwrite_trigger_run) {
i->overwrite_trigger_run = true;
- return bch2_trans_mark_old(trans, i->btree_id, i->level, old, i->flags) ?: 1;
+ return bch2_key_trigger_old(trans, i->btree_id, i->level, old, flags) ?: 1;
} else if (!overwrite && !i->insert_trigger_run) {
i->insert_trigger_run = true;
- return bch2_trans_mark_new(trans, i->btree_id, i->level, i->k, i->flags) ?: 1;
+ return bch2_key_trigger_new(trans, i->btree_id, i->level, bkey_i_to_s(i->k), flags) ?: 1;
} else {
return 0;
}
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 2a93eb92..44f9dfa2 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -568,7 +568,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
for_each_keylist_key(&as->old_keys, k) {
unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
- ret = bch2_trans_mark_old(trans, as->btree_id, level, bkey_i_to_s_c(k), 0);
+ ret = bch2_key_trigger_old(trans, as->btree_id, level, bkey_i_to_s_c(k),
+ BTREE_TRIGGER_TRANSACTIONAL);
if (ret)
return ret;
}
@@ -576,7 +577,8 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
for_each_keylist_key(&as->new_keys, k) {
unsigned level = bkey_i_to_btree_ptr_v2(k)->v.mem_ptr;
- ret = bch2_trans_mark_new(trans, as->btree_id, level, k, 0);
+ ret = bch2_key_trigger_new(trans, as->btree_id, level, bkey_i_to_s(k),
+ BTREE_TRIGGER_TRANSACTIONAL);
if (ret)
return ret;
}
@@ -2156,13 +2158,12 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
int ret;
if (!skip_triggers) {
- ret = bch2_trans_mark_old(trans, b->c.btree_id, b->c.level + 1,
- bkey_i_to_s_c(&b->key), 0);
- if (ret)
- return ret;
-
- ret = bch2_trans_mark_new(trans, b->c.btree_id, b->c.level + 1,
- new_key, 0);
+ ret = bch2_key_trigger_old(trans, b->c.btree_id, b->c.level + 1,
+ bkey_i_to_s_c(&b->key),
+ BTREE_TRIGGER_TRANSACTIONAL) ?:
+ bch2_key_trigger_new(trans, b->c.btree_id, b->c.level + 1,
+ bkey_i_to_s(new_key),
+ BTREE_TRIGGER_TRANSACTIONAL);
if (ret)
return ret;
}
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index c0dac042..67b7e796 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -296,10 +296,10 @@ void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage)
}
}
-static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
- struct bch_alloc_v4 old,
- struct bch_alloc_v4 new,
- u64 journal_seq, bool gc)
+void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
+ const struct bch_alloc_v4 *old,
+ const struct bch_alloc_v4 *new,
+ u64 journal_seq, bool gc)
{
struct bch_fs_usage *fs_usage;
struct bch_dev_usage *u;
@@ -307,24 +307,24 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
preempt_disable();
fs_usage = fs_usage_ptr(c, journal_seq, gc);
- if (data_type_is_hidden(old.data_type))
+ if (data_type_is_hidden(old->data_type))
fs_usage->hidden -= ca->mi.bucket_size;
- if (data_type_is_hidden(new.data_type))
+ if (data_type_is_hidden(new->data_type))
fs_usage->hidden += ca->mi.bucket_size;
u = dev_usage_ptr(ca, journal_seq, gc);
- u->d[old.data_type].buckets--;
- u->d[new.data_type].buckets++;
+ u->d[old->data_type].buckets--;
+ u->d[new->data_type].buckets++;
- u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old);
- u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new);
+ u->d[old->data_type].sectors -= bch2_bucket_sectors_dirty(*old);
+ u->d[new->data_type].sectors += bch2_bucket_sectors_dirty(*new);
- u->d[BCH_DATA_cached].sectors += new.cached_sectors;
- u->d[BCH_DATA_cached].sectors -= old.cached_sectors;
+ u->d[BCH_DATA_cached].sectors += new->cached_sectors;
+ u->d[BCH_DATA_cached].sectors -= old->cached_sectors;
- u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old);
- u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new);
+ u->d[old->data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, *old);
+ u->d[new->data_type].fragmented += bch2_bucket_sectors_fragmented(ca, *new);
preempt_enable();
}
@@ -340,13 +340,13 @@ static inline struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b)
};
}
-static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
- struct bucket old, struct bucket new)
+void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca,
+ struct bucket *old, struct bucket *new)
{
- bch2_dev_usage_update(c, ca,
- bucket_m_to_alloc(old),
- bucket_m_to_alloc(new),
- 0, true);
+ struct bch_alloc_v4 old_a = bucket_m_to_alloc(*old);
+ struct bch_alloc_v4 new_a = bucket_m_to_alloc(*new);
+
+ bch2_dev_usage_update(c, ca, &old_a, &new_a, 0, true);
}
static inline int __update_replicas(struct bch_fs *c,
@@ -364,9 +364,9 @@ static inline int __update_replicas(struct bch_fs *c,
return 0;
}
-static inline int update_replicas(struct bch_fs *c, struct bkey_s_c k,
- struct bch_replicas_entry_v1 *r, s64 sectors,
- unsigned journal_seq, bool gc)
+int bch2_update_replicas(struct bch_fs *c, struct bkey_s_c k,
+ struct bch_replicas_entry_v1 *r, s64 sectors,
+ unsigned journal_seq, bool gc)
{
struct bch_fs_usage *fs_usage;
int idx, ret = 0;
@@ -413,7 +413,7 @@ static inline int update_cached_sectors(struct bch_fs *c,
bch2_replicas_entry_cached(&r.e, dev);
- return update_replicas(c, k, &r.e, sectors, journal_seq, gc);
+ return bch2_update_replicas(c, k, &r.e, sectors, journal_seq, gc);
}
static int __replicas_deltas_realloc(struct btree_trans *trans, unsigned more,
@@ -496,114 +496,6 @@ int bch2_update_cached_sectors_list(struct btree_trans *trans, unsigned dev, s64
return bch2_update_replicas_list(trans, &r.e, sectors);
}
-int bch2_mark_alloc(struct btree_trans *trans,
- enum btree_id btree, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- bool gc = flags & BTREE_TRIGGER_GC;
- u64 journal_seq = trans->journal_res.seq;
- u64 bucket_journal_seq;
- struct bch_fs *c = trans->c;
- struct bch_alloc_v4 old_a_convert, new_a_convert;
- const struct bch_alloc_v4 *old_a, *new_a;
- struct bch_dev *ca;
- int ret = 0;
-
- /*
- * alloc btree is read in by bch2_alloc_read, not gc:
- */
- if ((flags & BTREE_TRIGGER_GC) &&
- !(flags & BTREE_TRIGGER_BUCKET_INVALIDATE))
- return 0;
-
- if (bch2_trans_inconsistent_on(!bch2_dev_bucket_exists(c, new.k->p), trans,
- "alloc key for invalid device or bucket"))
- return -EIO;
-
- ca = bch_dev_bkey_exists(c, new.k->p.inode);
-
- old_a = bch2_alloc_to_v4(old, &old_a_convert);
- new_a = bch2_alloc_to_v4(new, &new_a_convert);
-
- bucket_journal_seq = new_a->journal_seq;
-
- if ((flags & BTREE_TRIGGER_INSERT) &&
- data_type_is_empty(old_a->data_type) !=
- data_type_is_empty(new_a->data_type) &&
- new.k->type == KEY_TYPE_alloc_v4) {
- struct bch_alloc_v4 *v = (struct bch_alloc_v4 *) new.v;
-
- EBUG_ON(!journal_seq);
-
- /*
- * If the btree updates referring to a bucket weren't flushed
- * before the bucket became empty again, then the we don't have
- * to wait on a journal flush before we can reuse the bucket:
- */
- v->journal_seq = bucket_journal_seq =
- data_type_is_empty(new_a->data_type) &&
- (journal_seq == v->journal_seq ||
- bch2_journal_noflush_seq(&c->journal, v->journal_seq))
- ? 0 : journal_seq;
- }
-
- if (!data_type_is_empty(old_a->data_type) &&
- data_type_is_empty(new_a->data_type) &&
- bucket_journal_seq) {
- ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk,
- new.k->p.inode, new.k->p.offset,
- bucket_journal_seq);
- if (ret) {
- bch2_fs_fatal_error(c,
- "error setting bucket_needs_journal_commit: %i", ret);
- return ret;
- }
- }
-
- percpu_down_read(&c->mark_lock);
- if (!gc && new_a->gen != old_a->gen)
- *bucket_gen(ca, new.k->p.offset) = new_a->gen;
-
- bch2_dev_usage_update(c, ca, *old_a, *new_a, journal_seq, gc);
-
- if (gc) {
- struct bucket *g = gc_bucket(ca, new.k->p.offset);
-
- bucket_lock(g);
-
- g->gen_valid = 1;
- g->gen = new_a->gen;
- g->data_type = new_a->data_type;
- g->stripe = new_a->stripe;
- g->stripe_redundancy = new_a->stripe_redundancy;
- g->dirty_sectors = new_a->dirty_sectors;
- g->cached_sectors = new_a->cached_sectors;
-
- bucket_unlock(g);
- }
- percpu_up_read(&c->mark_lock);
-
- if (new_a->data_type == BCH_DATA_free &&
- (!new_a->journal_seq || new_a->journal_seq < c->journal.flushed_seq_ondisk))
- closure_wake_up(&c->freelist_wait);
-
- if (new_a->data_type == BCH_DATA_need_discard &&
- (!bucket_journal_seq || bucket_journal_seq < c->journal.flushed_seq_ondisk))
- bch2_do_discards(c);
-
- if (old_a->data_type != BCH_DATA_cached &&
- new_a->data_type == BCH_DATA_cached &&
- should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
- bch2_do_invalidates(c);
-
- if (new_a->data_type == BCH_DATA_need_gc_gens)
- bch2_do_gc_gens(c);
-
- return 0;
-}
-
int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
size_t b, enum bch_data_type data_type,
unsigned sectors, struct gc_pos pos,
@@ -652,17 +544,17 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
err:
bucket_unlock(g);
if (!ret)
- bch2_dev_usage_update_m(c, ca, old, new);
+ bch2_dev_usage_update_m(c, ca, &old, &new);
percpu_up_read(&c->mark_lock);
return ret;
}
-static int check_bucket_ref(struct btree_trans *trans,
- struct bkey_s_c k,
- const struct bch_extent_ptr *ptr,
- s64 sectors, enum bch_data_type ptr_data_type,
- u8 b_gen, u8 bucket_data_type,
- u32 bucket_sectors)
+int bch2_check_bucket_ref(struct btree_trans *trans,
+ struct bkey_s_c k,
+ const struct bch_extent_ptr *ptr,
+ s64 sectors, enum bch_data_type ptr_data_type,
+ u8 b_gen, u8 bucket_data_type,
+ u32 bucket_sectors)
{
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
@@ -761,404 +653,6 @@ err:
goto out;
}
-static int mark_stripe_bucket(struct btree_trans *trans,
- struct bkey_s_c k,
- unsigned ptr_idx,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
- const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
- unsigned nr_data = s->nr_blocks - s->nr_redundant;
- bool parity = ptr_idx >= nr_data;
- enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
- s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
- const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- struct bucket old, new, *g;
- struct printbuf buf = PRINTBUF;
- int ret = 0;
-
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
- /* * XXX doesn't handle deletion */
-
- percpu_down_read(&c->mark_lock);
- g = PTR_GC_BUCKET(ca, ptr);
-
- if (g->dirty_sectors ||
- (g->stripe && g->stripe != k.k->p.offset)) {
- bch2_fs_inconsistent(c,
- "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
- ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
- ret = -EINVAL;
- goto err;
- }
-
- bucket_lock(g);
- old = *g;
-
- ret = check_bucket_ref(trans, k, ptr, sectors, data_type,
- g->gen, g->data_type,
- g->dirty_sectors);
- if (ret)
- goto err;
-
- g->data_type = data_type;
- g->dirty_sectors += sectors;
-
- g->stripe = k.k->p.offset;
- g->stripe_redundancy = s->nr_redundant;
- new = *g;
-err:
- bucket_unlock(g);
- if (!ret)
- bch2_dev_usage_update_m(c, ca, old, new);
- percpu_up_read(&c->mark_lock);
- printbuf_exit(&buf);
- return ret;
-}
-
-static int __mark_pointer(struct btree_trans *trans,
- struct bkey_s_c k,
- const struct bch_extent_ptr *ptr,
- s64 sectors, enum bch_data_type ptr_data_type,
- u8 bucket_gen, u8 *bucket_data_type,
- u32 *dirty_sectors, u32 *cached_sectors)
-{
- u32 *dst_sectors = !ptr->cached
- ? dirty_sectors
- : cached_sectors;
- int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
- bucket_gen, *bucket_data_type, *dst_sectors);
-
- if (ret)
- return ret;
-
- *dst_sectors += sectors;
-
- if (!*dirty_sectors && !*cached_sectors)
- *bucket_data_type = 0;
- else if (*bucket_data_type != BCH_DATA_stripe)
- *bucket_data_type = ptr_data_type;
-
- return 0;
-}
-
-static int bch2_mark_pointer(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k,
- struct extent_ptr_decoded p,
- s64 sectors,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
- struct bucket old, new, *g;
- enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
- u8 bucket_data_type;
- int ret = 0;
-
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
- percpu_down_read(&c->mark_lock);
- g = PTR_GC_BUCKET(ca, &p.ptr);
- bucket_lock(g);
- old = *g;
-
- bucket_data_type = g->data_type;
- ret = __mark_pointer(trans, k, &p.ptr, sectors,
- data_type, g->gen,
- &bucket_data_type,
- &g->dirty_sectors,
- &g->cached_sectors);
- if (!ret)
- g->data_type = bucket_data_type;
-
- new = *g;
- bucket_unlock(g);
- if (!ret)
- bch2_dev_usage_update_m(c, ca, old, new);
- percpu_up_read(&c->mark_lock);
-
- return ret;
-}
-
-static int bch2_mark_stripe_ptr(struct btree_trans *trans,
- struct bkey_s_c k,
- struct bch_extent_stripe_ptr p,
- enum bch_data_type data_type,
- s64 sectors,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct bch_replicas_padded r;
- struct gc_stripe *m;
-
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
- m = genradix_ptr_alloc(&c->gc_stripes, p.idx, GFP_KERNEL);
- if (!m) {
- bch_err(c, "error allocating memory for gc_stripes, idx %llu",
- (u64) p.idx);
- return -BCH_ERR_ENOMEM_mark_stripe_ptr;
- }
-
- mutex_lock(&c->ec_stripes_heap_lock);
-
- if (!m || !m->alive) {
- mutex_unlock(&c->ec_stripes_heap_lock);
- bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
- (u64) p.idx);
- bch2_inconsistent_error(c);
- return -EIO;
- }
-
- m->block_sectors[p.block] += sectors;
-
- r = m->r;
- mutex_unlock(&c->ec_stripes_heap_lock);
-
- r.e.data_type = data_type;
- update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
-
- return 0;
-}
-
-static int __mark_extent(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, unsigned flags)
-{
- u64 journal_seq = trans->journal_res.seq;
- struct bch_fs *c = trans->c;
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- const union bch_extent_entry *entry;
- struct extent_ptr_decoded p;
- struct bch_replicas_padded r;
- enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
- ? BCH_DATA_btree
- : BCH_DATA_user;
- s64 sectors = bkey_is_btree_ptr(k.k)
- ? btree_sectors(c)
- : k.k->size;
- s64 dirty_sectors = 0;
- bool stale;
- int ret;
-
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
- r.e.data_type = data_type;
- r.e.nr_devs = 0;
- r.e.nr_required = 1;
-
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- s64 disk_sectors = ptr_disk_sectors(sectors, p);
-
- if (flags & BTREE_TRIGGER_OVERWRITE)
- disk_sectors = -disk_sectors;
-
- ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
- if (ret < 0)
- return ret;
-
- stale = ret > 0;
-
- if (p.ptr.cached) {
- if (!stale) {
- ret = update_cached_sectors(c, k, p.ptr.dev,
- disk_sectors, journal_seq, true);
- if (ret) {
- bch2_fs_fatal_error(c, "%s(): no replicas entry while updating cached sectors",
- __func__);
- return ret;
- }
- }
- } else if (!p.has_ec) {
- dirty_sectors += disk_sectors;
- r.e.devs[r.e.nr_devs++] = p.ptr.dev;
- } else {
- ret = bch2_mark_stripe_ptr(trans, k, p.ec, data_type,
- disk_sectors, flags);
- if (ret)
- return ret;
-
- /*
- * There may be other dirty pointers in this extent, but
- * if so they're not required for mounting if we have an
- * erasure coded pointer in this extent:
- */
- r.e.nr_required = 0;
- }
- }
-
- if (r.e.nr_devs) {
- ret = update_replicas(c, k, &r.e, dirty_sectors, journal_seq, true);
- if (ret) {
- struct printbuf buf = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
- printbuf_exit(&buf);
- return ret;
- }
- }
-
- return 0;
-}
-
-int bch2_mark_extent(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- return mem_trigger_run_overwrite_then_insert(__mark_extent, trans, btree_id, level, old, new, flags);
-}
-
-int bch2_mark_stripe(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- bool gc = flags & BTREE_TRIGGER_GC;
- u64 journal_seq = trans->journal_res.seq;
- struct bch_fs *c = trans->c;
- u64 idx = new.k->p.offset;
- const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
- ? bkey_s_c_to_stripe(old).v : NULL;
- const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
- ? bkey_s_c_to_stripe(new).v : NULL;
- unsigned i;
- int ret;
-
- BUG_ON(gc && old_s);
-
- if (!gc) {
- struct stripe *m = genradix_ptr(&c->stripes, idx);
-
- if (!m) {
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf1, c, old);
- bch2_bkey_val_to_text(&buf2, c, new);
- bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
- "old %s\n"
- "new %s", idx, buf1.buf, buf2.buf);
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
- bch2_inconsistent_error(c);
- return -1;
- }
-
- if (!new_s) {
- bch2_stripes_heap_del(c, m, idx);
-
- memset(m, 0, sizeof(*m));
- } else {
- m->sectors = le16_to_cpu(new_s->sectors);
- m->algorithm = new_s->algorithm;
- m->nr_blocks = new_s->nr_blocks;
- m->nr_redundant = new_s->nr_redundant;
- m->blocks_nonempty = 0;
-
- for (i = 0; i < new_s->nr_blocks; i++)
- m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
-
- if (!old_s)
- bch2_stripes_heap_insert(c, m, idx);
- else
- bch2_stripes_heap_update(c, m, idx);
- }
- } else {
- struct gc_stripe *m =
- genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
-
- if (!m) {
- bch_err(c, "error allocating memory for gc_stripes, idx %llu",
- idx);
- return -BCH_ERR_ENOMEM_mark_stripe;
- }
- /*
- * This will be wrong when we bring back runtime gc: we should
- * be unmarking the old key and then marking the new key
- */
- m->alive = true;
- m->sectors = le16_to_cpu(new_s->sectors);
- m->nr_blocks = new_s->nr_blocks;
- m->nr_redundant = new_s->nr_redundant;
-
- for (i = 0; i < new_s->nr_blocks; i++)
- m->ptrs[i] = new_s->ptrs[i];
-
- bch2_bkey_to_replicas(&m->r.e, new);
-
- /*
- * gc recalculates this field from stripe ptr
- * references:
- */
- memset(m->block_sectors, 0, sizeof(m->block_sectors));
-
- for (i = 0; i < new_s->nr_blocks; i++) {
- ret = mark_stripe_bucket(trans, new, i, flags);
- if (ret)
- return ret;
- }
-
- ret = update_replicas(c, new, &m->r.e,
- ((s64) m->sectors * m->nr_redundant),
- journal_seq, gc);
- if (ret) {
- struct printbuf buf = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf, c, new);
- bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
- printbuf_exit(&buf);
- return ret;
- }
- }
-
- return 0;
-}
-
-static int __mark_reservation(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct bch_fs_usage *fs_usage;
- unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
- s64 sectors = (s64) k.k->size;
-
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
-
- if (flags & BTREE_TRIGGER_OVERWRITE)
- sectors = -sectors;
- sectors *= replicas;
-
- percpu_down_read(&c->mark_lock);
- preempt_disable();
-
- fs_usage = fs_usage_ptr(c, trans->journal_res.seq, flags & BTREE_TRIGGER_GC);
- replicas = clamp_t(unsigned, replicas, 1,
- ARRAY_SIZE(fs_usage->persistent_reserved));
-
- fs_usage->reserved += sectors;
- fs_usage->persistent_reserved[replicas - 1] += sectors;
-
- preempt_enable();
- percpu_up_read(&c->mark_lock);
-
- return 0;
-}
-
-int bch2_mark_reservation(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- return mem_trigger_run_overwrite_then_insert(__mark_reservation, trans, btree_id, level, old, new, flags);
-}
-
void bch2_trans_fs_usage_revert(struct btree_trans *trans,
struct replicas_delta_list *deltas)
{
@@ -1278,92 +772,184 @@ need_mark:
return -1;
}
-/* trans_mark: */
+/* KEY_TYPE_extent: */
+
+static int __mark_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ const struct bch_extent_ptr *ptr,
+ s64 sectors, enum bch_data_type ptr_data_type,
+ u8 bucket_gen, u8 *bucket_data_type,
+ u32 *dirty_sectors, u32 *cached_sectors)
+{
+ u32 *dst_sectors = !ptr->cached
+ ? dirty_sectors
+ : cached_sectors;
+ int ret = bch2_check_bucket_ref(trans, k, ptr, sectors, ptr_data_type,
+ bucket_gen, *bucket_data_type, *dst_sectors);
+
+ if (ret)
+ return ret;
+
+ *dst_sectors += sectors;
+
+ if (!*dirty_sectors && !*cached_sectors)
+ *bucket_data_type = 0;
+ else if (*bucket_data_type != BCH_DATA_stripe)
+ *bucket_data_type = ptr_data_type;
+
+ return 0;
+}
-static inline int bch2_trans_mark_pointer(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, struct extent_ptr_decoded p,
- unsigned flags)
+static int bch2_trigger_pointer(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, struct extent_ptr_decoded p,
+ s64 *sectors,
+ unsigned flags)
{
bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
- struct btree_iter iter;
- struct bkey_i_alloc_v4 *a;
struct bpos bucket;
struct bch_backpointer bp;
- s64 sectors;
- int ret;
bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
- sectors = bp.bucket_len;
- if (!insert)
- sectors = -sectors;
-
- a = bch2_trans_start_alloc_update(trans, &iter, bucket);
- if (IS_ERR(a))
- return PTR_ERR(a);
+ *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
- ret = __mark_pointer(trans, k, &p.ptr, sectors, bp.data_type,
- a->v.gen, &a->v.data_type,
- &a->v.dirty_sectors, &a->v.cached_sectors) ?:
- bch2_trans_update(trans, &iter, &a->k_i, 0);
- bch2_trans_iter_exit(trans, &iter);
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &iter, bucket);
+ int ret = PTR_ERR_OR_ZERO(a);
+ if (ret)
+ return ret;
- if (ret)
- return ret;
+ ret = __mark_pointer(trans, k, &p.ptr, *sectors, bp.data_type,
+ a->v.gen, &a->v.data_type,
+ &a->v.dirty_sectors, &a->v.cached_sectors) ?:
+ bch2_trans_update(trans, &iter, &a->k_i, 0);
+ bch2_trans_iter_exit(trans, &iter);
- if (!p.ptr.cached) {
- ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
if (ret)
return ret;
+
+ if (!p.ptr.cached) {
+ ret = bch2_bucket_backpointer_mod(trans, bucket, bp, k, insert);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+ enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+
+ percpu_down_read(&c->mark_lock);
+ struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+ bucket_lock(g);
+ struct bucket old = *g;
+
+ u8 bucket_data_type = g->data_type;
+ int ret = __mark_pointer(trans, k, &p.ptr, *sectors,
+ data_type, g->gen,
+ &bucket_data_type,
+ &g->dirty_sectors,
+ &g->cached_sectors);
+ if (ret) {
+ bucket_unlock(g);
+ percpu_up_read(&c->mark_lock);
+ return ret;
+ }
+
+ g->data_type = bucket_data_type;
+ struct bucket new = *g;
+ bucket_unlock(g);
+ bch2_dev_usage_update_m(c, ca, &old, &new);
+ percpu_up_read(&c->mark_lock);
}
return 0;
}
-static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
- struct extent_ptr_decoded p,
- s64 sectors, enum bch_data_type data_type)
+static int bch2_trigger_stripe_ptr(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct extent_ptr_decoded p,
+ enum bch_data_type data_type,
+ s64 sectors, unsigned flags)
{
- struct btree_iter iter;
- struct bkey_i_stripe *s;
- struct bch_replicas_padded r;
- int ret = 0;
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct btree_iter iter;
+ struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_stripes, POS(0, p.ec.idx),
+ BTREE_ITER_WITH_UPDATES, stripe);
+ int ret = PTR_ERR_OR_ZERO(s);
+ if (unlikely(ret)) {
+ bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
+ "pointer to nonexistent stripe %llu",
+ (u64) p.ec.idx);
+ goto err;
+ }
- s = bch2_bkey_get_mut_typed(trans, &iter,
- BTREE_ID_stripes, POS(0, p.ec.idx),
- BTREE_ITER_WITH_UPDATES, stripe);
- ret = PTR_ERR_OR_ZERO(s);
- if (unlikely(ret)) {
- bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans,
- "pointer to nonexistent stripe %llu",
- (u64) p.ec.idx);
- goto err;
- }
+ if (!bch2_ptr_matches_stripe(&s->v, p)) {
+ bch2_trans_inconsistent(trans,
+ "stripe pointer doesn't match stripe %llu",
+ (u64) p.ec.idx);
+ ret = -EIO;
+ goto err;
+ }
- if (!bch2_ptr_matches_stripe(&s->v, p)) {
- bch2_trans_inconsistent(trans,
- "stripe pointer doesn't match stripe %llu",
- (u64) p.ec.idx);
- ret = -EIO;
- goto err;
+ stripe_blockcount_set(&s->v, p.ec.block,
+ stripe_blockcount_get(&s->v, p.ec.block) +
+ sectors);
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
+ r.e.data_type = data_type;
+ ret = bch2_update_replicas_list(trans, &r.e, sectors);
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
}
- stripe_blockcount_set(&s->v, p.ec.block,
- stripe_blockcount_get(&s->v, p.ec.block) +
- sectors);
+ if (flags & BTREE_TRIGGER_GC) {
+ struct bch_fs *c = trans->c;
- bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(&s->k_i));
- r.e.data_type = data_type;
- ret = bch2_update_replicas_list(trans, &r.e, sectors);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+ struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL);
+ if (!m) {
+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+ (u64) p.ec.idx);
+ return -BCH_ERR_ENOMEM_mark_stripe_ptr;
+ }
+
+ mutex_lock(&c->ec_stripes_heap_lock);
+
+ if (!m || !m->alive) {
+ mutex_unlock(&c->ec_stripes_heap_lock);
+ struct printbuf buf = PRINTBUF;
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s",
+ (u64) p.ec.idx, buf.buf);
+ printbuf_exit(&buf);
+ bch2_inconsistent_error(c);
+ return -EIO;
+ }
+
+ m->block_sectors[p.ec.block] += sectors;
+
+ struct bch_replicas_padded r = m->r;
+ mutex_unlock(&c->ec_stripes_heap_lock);
+
+ r.e.data_type = data_type;
+ bch2_update_replicas(c, k, &r.e, sectors, trans->journal_res.seq, true);
+ }
+
+ return 0;
}
-static int __trans_mark_extent(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, unsigned flags)
+static int __trigger_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, unsigned flags)
{
+ bool gc = flags & BTREE_TRIGGER_GC;
struct bch_fs *c = trans->c;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
const union bch_extent_entry *entry;
@@ -1372,11 +958,7 @@ static int __trans_mark_extent(struct btree_trans *trans,
enum bch_data_type data_type = bkey_is_btree_ptr(k.k)
? BCH_DATA_btree
: BCH_DATA_user;
- s64 sectors = bkey_is_btree_ptr(k.k)
- ? btree_sectors(c)
- : k.k->size;
s64 dirty_sectors = 0;
- bool stale;
int ret = 0;
r.e.data_type = data_type;
@@ -1384,21 +966,20 @@ static int __trans_mark_extent(struct btree_trans *trans,
r.e.nr_required = 1;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- s64 disk_sectors = ptr_disk_sectors(sectors, p);
-
- if (flags & BTREE_TRIGGER_OVERWRITE)
- disk_sectors = -disk_sectors;
-
- ret = bch2_trans_mark_pointer(trans, btree_id, level, k, p, flags);
+ s64 disk_sectors;
+ ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
if (ret < 0)
return ret;
- stale = ret > 0;
+ bool stale = ret > 0;
if (p.ptr.cached) {
if (!stale) {
- ret = bch2_update_cached_sectors_list(trans, p.ptr.dev,
- disk_sectors);
+ ret = !gc
+ ? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
+ : update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
+ bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
+ __func__);
if (ret)
return ret;
}
@@ -1406,226 +987,111 @@ static int __trans_mark_extent(struct btree_trans *trans,
dirty_sectors += disk_sectors;
r.e.devs[r.e.nr_devs++] = p.ptr.dev;
} else {
- ret = bch2_trans_mark_stripe_ptr(trans, p,
- disk_sectors, data_type);
+ ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags);
if (ret)
return ret;
+ /*
+ * There may be other dirty pointers in this extent, but
+ * if so they're not required for mounting if we have an
+ * erasure coded pointer in this extent:
+ */
r.e.nr_required = 0;
}
}
- if (r.e.nr_devs)
- ret = bch2_update_replicas_list(trans, &r.e, dirty_sectors);
-
- return ret;
-}
-
-int bch2_trans_mark_extent(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
- int mod = (int) bch2_bkey_needs_rebalance(c, bkey_i_to_s_c(new)) -
- (int) bch2_bkey_needs_rebalance(c, old);
+ if (r.e.nr_devs) {
+ ret = !gc
+ ? bch2_update_replicas_list(trans, &r.e, dirty_sectors)
+ : bch2_update_replicas(c, k, &r.e, dirty_sectors, 0, true);
+ if (unlikely(ret && gc)) {
+ struct printbuf buf = PRINTBUF;
- if (mod) {
- int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new->k.p, mod > 0);
+ bch2_bkey_val_to_text(&buf, c, k);
+ bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
+ printbuf_exit(&buf);
+ }
if (ret)
return ret;
}
- return trigger_run_overwrite_then_insert(__trans_mark_extent, trans, btree_id, level, old, new, flags);
+ return 0;
}
-static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
- struct bkey_s_c_stripe s,
- unsigned idx, bool deleting)
+int bch2_trigger_extent(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
{
- struct bch_fs *c = trans->c;
- const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
- struct btree_iter iter;
- struct bkey_i_alloc_v4 *a;
- enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
- ? BCH_DATA_parity : 0;
- s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
- int ret = 0;
-
- if (deleting)
- sectors = -sectors;
-
- a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
- if (IS_ERR(a))
- return PTR_ERR(a);
-
- ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
- a->v.gen, a->v.data_type,
- a->v.dirty_sectors);
- if (ret)
- goto err;
-
- if (!deleting) {
- if (bch2_trans_inconsistent_on(a->v.stripe ||
- a->v.stripe_redundancy, trans,
- "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
- iter.pos.inode, iter.pos.offset, a->v.gen,
- bch2_data_types[a->v.data_type],
- a->v.dirty_sectors,
- a->v.stripe, s.k->p.offset)) {
- ret = -EIO;
- goto err;
- }
-
- if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
- "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
- iter.pos.inode, iter.pos.offset, a->v.gen,
- bch2_data_types[a->v.data_type],
- a->v.dirty_sectors,
- s.k->p.offset)) {
- ret = -EIO;
- goto err;
- }
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ struct bch_fs *c = trans->c;
+ int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) -
+ (int) bch2_bkey_needs_rebalance(c, old);
- a->v.stripe = s.k->p.offset;
- a->v.stripe_redundancy = s.v->nr_redundant;
- a->v.data_type = BCH_DATA_stripe;
- } else {
- if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
- a->v.stripe_redundancy != s.v->nr_redundant, trans,
- "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
- iter.pos.inode, iter.pos.offset, a->v.gen,
- s.k->p.offset, a->v.stripe)) {
- ret = -EIO;
- goto err;
+ if (mod) {
+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_rebalance_work, new.k->p, mod > 0);
+ if (ret)
+ return ret;
}
-
- a->v.stripe = 0;
- a->v.stripe_redundancy = 0;
- a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
}
- a->v.dirty_sectors += sectors;
- if (data_type)
- a->v.data_type = !deleting ? data_type : 0;
+ if (flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))
+ return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree_id, level, old, new, flags);
- ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
- if (ret)
- goto err;
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
+ return 0;
}
-int bch2_trans_mark_stripe(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
- unsigned flags)
-{
- const struct bch_stripe *old_s = NULL;
- struct bch_stripe *new_s = NULL;
- struct bch_replicas_padded r;
- unsigned i, nr_blocks;
- int ret = 0;
-
- if (old.k->type == KEY_TYPE_stripe)
- old_s = bkey_s_c_to_stripe(old).v;
- if (new->k.type == KEY_TYPE_stripe)
- new_s = &bkey_i_to_stripe(new)->v;
-
- /*
- * If the pointers aren't changing, we don't need to do anything:
- */
- if (new_s && old_s &&
- new_s->nr_blocks == old_s->nr_blocks &&
- new_s->nr_redundant == old_s->nr_redundant &&
- !memcmp(old_s->ptrs, new_s->ptrs,
- new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
- return 0;
+/* KEY_TYPE_reservation */
- BUG_ON(new_s && old_s &&
- (new_s->nr_blocks != old_s->nr_blocks ||
- new_s->nr_redundant != old_s->nr_redundant));
-
- nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
-
- if (new_s) {
- s64 sectors = le16_to_cpu(new_s->sectors);
-
- bch2_bkey_to_replicas(&r.e, bkey_i_to_s_c(new));
- ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
- if (ret)
- return ret;
- }
+static int __trigger_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c k, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+ s64 sectors = (s64) k.k->size * replicas;
- if (old_s) {
- s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+ if (flags & BTREE_TRIGGER_OVERWRITE)
+ sectors = -sectors;
- bch2_bkey_to_replicas(&r.e, old);
- ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ int ret = bch2_replicas_deltas_realloc(trans, 0);
if (ret)
return ret;
- }
-
- for (i = 0; i < nr_blocks; i++) {
- if (new_s && old_s &&
- !memcmp(&new_s->ptrs[i],
- &old_s->ptrs[i],
- sizeof(new_s->ptrs[i])))
- continue;
- if (new_s) {
- ret = bch2_trans_mark_stripe_bucket(trans,
- bkey_i_to_s_c_stripe(new), i, false);
- if (ret)
- break;
- }
+ struct replicas_delta_list *d = trans->fs_usage_deltas;
+ replicas = min(replicas, ARRAY_SIZE(d->persistent_reserved));
- if (old_s) {
- ret = bch2_trans_mark_stripe_bucket(trans,
- bkey_s_c_to_stripe(old), i, true);
- if (ret)
- break;
- }
+ d->persistent_reserved[replicas - 1] += sectors;
}
- return ret;
-}
-
-static int __trans_mark_reservation(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, unsigned flags)
-{
- unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
- s64 sectors = (s64) k.k->size;
- struct replicas_delta_list *d;
- int ret;
+ if (flags & BTREE_TRIGGER_GC) {
+ percpu_down_read(&c->mark_lock);
+ preempt_disable();
- if (flags & BTREE_TRIGGER_OVERWRITE)
- sectors = -sectors;
- sectors *= replicas;
+ struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage_gc);
- ret = bch2_replicas_deltas_realloc(trans, 0);
- if (ret)
- return ret;
+ replicas = min(replicas, ARRAY_SIZE(fs_usage->persistent_reserved));
+ fs_usage->reserved += sectors;
+ fs_usage->persistent_reserved[replicas - 1] += sectors;
- d = trans->fs_usage_deltas;
- replicas = clamp_t(unsigned, replicas, 1,
- ARRAY_SIZE(d->persistent_reserved));
+ preempt_enable();
+ percpu_up_read(&c->mark_lock);
+ }
- d->persistent_reserved[replicas - 1] += sectors;
return 0;
}
-int bch2_trans_mark_reservation(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old,
- struct bkey_i *new,
- unsigned flags)
+int bch2_trigger_reservation(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
{
- return trigger_run_overwrite_then_insert(__trans_mark_reservation, trans, btree_id, level, old, new, flags);
+ return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags);
}
+/* Mark superblocks: */
+
static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
struct bch_dev *ca, size_t b,
enum bch_data_type type,
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 379101d7..2c95cc5d 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -302,6 +302,12 @@ u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage_online *);
struct bch_fs_usage_short
bch2_fs_usage_read_short(struct bch_fs *);
+void bch2_dev_usage_update(struct bch_fs *, struct bch_dev *,
+ const struct bch_alloc_v4 *,
+ const struct bch_alloc_v4 *, u64, bool);
+void bch2_dev_usage_update_m(struct bch_fs *, struct bch_dev *,
+ struct bucket *, struct bucket *);
+
/* key/bucket marking: */
static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
@@ -316,6 +322,9 @@ static inline struct bch_fs_usage *fs_usage_ptr(struct bch_fs *c,
: c->usage[journal_seq & JOURNAL_BUF_MASK]);
}
+int bch2_update_replicas(struct bch_fs *, struct bkey_s_c,
+ struct bch_replicas_entry_v1 *, s64,
+ unsigned, bool);
int bch2_update_replicas_list(struct btree_trans *,
struct bch_replicas_entry_v1 *, s64);
int bch2_update_cached_sectors_list(struct btree_trans *, unsigned, s64);
@@ -323,36 +332,30 @@ int bch2_replicas_deltas_realloc(struct btree_trans *, unsigned);
void bch2_fs_usage_initialize(struct bch_fs *);
+int bch2_check_bucket_ref(struct btree_trans *, struct bkey_s_c,
+ const struct bch_extent_ptr *,
+ s64, enum bch_data_type, u8, u8, u32);
+
int bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
size_t, enum bch_data_type, unsigned,
struct gc_pos, unsigned);
-int bch2_mark_alloc(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_extent(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_stripe(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
-int bch2_mark_reservation(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
-
-int bch2_trans_mark_extent(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_trans_mark_stripe(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_trans_mark_reservation(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_i *, unsigned);
-#define mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
+int bch2_trigger_extent(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+int bch2_trigger_reservation(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
+
+#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags)\
({ \
int ret = 0; \
\
if (_old.k->type) \
ret = _fn(_trans, _btree_id, _level, _old, _flags & ~BTREE_TRIGGER_INSERT); \
if (!ret && _new.k->type) \
- ret = _fn(_trans, _btree_id, _level, _new, _flags & ~BTREE_TRIGGER_OVERWRITE); \
+ ret = _fn(_trans, _btree_id, _level, _new.s_c, _flags & ~BTREE_TRIGGER_OVERWRITE);\
ret; \
})
-#define trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, _new, _flags) \
- mem_trigger_run_overwrite_then_insert(_fn, _trans, _btree_id, _level, _old, bkey_i_to_s_c(_new), _flags)
-
void bch2_trans_fs_usage_revert(struct btree_trans *, struct replicas_delta_list *);
int bch2_trans_fs_usage_apply(struct btree_trans *, struct replicas_delta_list *);
diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c
index 22a52bc8..226b39c1 100644
--- a/libbcachefs/chardev.c
+++ b/libbcachefs/chardev.c
@@ -11,16 +11,13 @@
#include "replicas.h"
#include "super.h"
#include "super-io.h"
+#include "thread_with_file.h"
-#include <linux/anon_inodes.h>
#include <linux/cdev.h>
#include <linux/device.h>
-#include <linux/file.h>
#include <linux/fs.h>
#include <linux/ioctl.h>
-#include <linux/kthread.h>
#include <linux/major.h>
-#include <linux/poll.h>
#include <linux/sched/task.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -31,65 +28,6 @@ static int copy_to_user_errcode(void __user *to, const void *from, unsigned long
return copy_to_user(to, from, n) ? -EFAULT : 0;
}
-struct thread_with_file {
- struct task_struct *task;
- int ret;
- bool done;
-};
-
-static void thread_with_file_exit(struct thread_with_file *thr)
-{
- if (thr->task) {
- kthread_stop(thr->task);
- put_task_struct(thr->task);
- }
-}
-
-__printf(4, 0)
-static int run_thread_with_file(struct thread_with_file *thr,
- const struct file_operations *fops,
- int (*fn)(void *), const char *fmt, ...)
-{
- va_list args;
- struct file *file = NULL;
- int ret, fd = -1;
- struct printbuf name = PRINTBUF;
- unsigned fd_flags = O_RDONLY|O_CLOEXEC|O_NONBLOCK;
-
- va_start(args, fmt);
- prt_vprintf(&name, fmt, args);
- va_end(args);
-
- thr->ret = 0;
- thr->task = kthread_create(fn, thr, name.buf);
- ret = PTR_ERR_OR_ZERO(thr->task);
- if (ret)
- goto err;
-
- ret = get_unused_fd_flags(fd_flags);
- if (ret < 0)
- goto err_stop_task;
- fd = ret;
-
- file = anon_inode_getfile(name.buf, fops, thr, fd_flags);
- ret = PTR_ERR_OR_ZERO(file);
- if (ret)
- goto err_put_fd;
-
- fd_install(fd, file);
- get_task_struct(thr->task);
- wake_up_process(thr->task);
- printbuf_exit(&name);
- return fd;
-err_put_fd:
- put_unused_fd(fd);
-err_stop_task:
- kthread_stop(thr->task);
-err:
- printbuf_exit(&name);
- return ret;
-}
-
/* returns with ref on ca->ref */
static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
unsigned flags)
@@ -200,132 +138,33 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
#endif
struct fsck_thread {
- struct thread_with_file thr;
- struct printbuf buf;
+ struct thread_with_stdio thr;
struct bch_fs *c;
char **devs;
size_t nr_devs;
struct bch_opts opts;
-
- struct log_output output;
- DARRAY(char) output2;
};
-static void bch2_fsck_thread_free(struct fsck_thread *thr)
+static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
{
- thread_with_file_exit(&thr->thr);
+ struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
if (thr->devs)
for (size_t i = 0; i < thr->nr_devs; i++)
kfree(thr->devs[i]);
- darray_exit(&thr->output2);
- printbuf_exit(&thr->output.buf);
kfree(thr->devs);
kfree(thr);
}
-static int bch2_fsck_thread_release(struct inode *inode, struct file *file)
-{
- struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
-
- bch2_fsck_thread_free(thr);
- return 0;
-}
-
-static bool fsck_thread_ready(struct fsck_thread *thr)
-{
- return thr->output.buf.pos ||
- thr->output2.nr ||
- thr->thr.done;
-}
-
-static ssize_t bch2_fsck_thread_read(struct file *file, char __user *buf,
- size_t len, loff_t *ppos)
-{
- struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
- size_t copied = 0, b;
- int ret = 0;
-
- if ((file->f_flags & O_NONBLOCK) &&
- !fsck_thread_ready(thr))
- return -EAGAIN;
-
- ret = wait_event_interruptible(thr->output.wait,
- fsck_thread_ready(thr));
- if (ret)
- return ret;
-
- if (thr->thr.done)
- return 0;
-
- while (len) {
- ret = darray_make_room(&thr->output2, thr->output.buf.pos);
- if (ret)
- break;
-
- spin_lock_irq(&thr->output.lock);
- b = min_t(size_t, darray_room(thr->output2), thr->output.buf.pos);
-
- memcpy(&darray_top(thr->output2), thr->output.buf.buf, b);
- memmove(thr->output.buf.buf,
- thr->output.buf.buf + b,
- thr->output.buf.pos - b);
-
- thr->output2.nr += b;
- thr->output.buf.pos -= b;
- spin_unlock_irq(&thr->output.lock);
-
- b = min(len, thr->output2.nr);
- if (!b)
- break;
-
- b -= copy_to_user(buf, thr->output2.data, b);
- if (!b) {
- ret = -EFAULT;
- break;
- }
-
- copied += b;
- buf += b;
- len -= b;
-
- memmove(thr->output2.data,
- thr->output2.data + b,
- thr->output2.nr - b);
- thr->output2.nr -= b;
- }
-
- return copied ?: ret;
-}
-
-static __poll_t bch2_fsck_thread_poll(struct file *file, struct poll_table_struct *wait)
-{
- struct fsck_thread *thr = container_of(file->private_data, struct fsck_thread, thr);
-
- poll_wait(file, &thr->output.wait, wait);
-
- return fsck_thread_ready(thr)
- ? EPOLLIN|EPOLLHUP
- : 0;
-}
-
-static const struct file_operations fsck_thread_ops = {
- .release = bch2_fsck_thread_release,
- .read = bch2_fsck_thread_read,
- .poll = bch2_fsck_thread_poll,
- .llseek = no_llseek,
-};
-
static int bch2_fsck_offline_thread_fn(void *arg)
{
struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
- thr->thr.ret = PTR_ERR_OR_ZERO(c);
- if (!thr->thr.ret)
+ thr->thr.thr.ret = PTR_ERR_OR_ZERO(c);
+ if (!thr->thr.thr.ret)
bch2_fs_stop(c);
- thr->thr.done = true;
- wake_up(&thr->output.wait);
+ thread_with_stdio_done(&thr->thr);
return 0;
}
@@ -354,11 +193,6 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
thr->opts = bch2_opts_empty();
thr->nr_devs = arg.nr_devs;
- thr->output.buf = PRINTBUF;
- thr->output.buf.atomic++;
- spin_lock_init(&thr->output.lock);
- init_waitqueue_head(&thr->output.wait);
- darray_init(&thr->output2);
if (copy_from_user(devs, &user_arg->devs[0],
array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
@@ -384,16 +218,15 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
goto err;
}
- opt_set(thr->opts, log_output, (u64)(unsigned long)&thr->output);
+ opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
- ret = run_thread_with_file(&thr->thr,
- &fsck_thread_ops,
- bch2_fsck_offline_thread_fn,
- "bch-fsck");
+ ret = bch2_run_thread_with_stdio(&thr->thr,
+ bch2_fsck_thread_exit,
+ bch2_fsck_offline_thread_fn);
err:
if (ret < 0) {
if (thr)
- bch2_fsck_thread_free(thr);
+ bch2_fsck_thread_exit(&thr->thr);
pr_err("ret %s", bch2_err_str(ret));
}
kfree(devs);
@@ -592,7 +425,7 @@ static int bch2_data_job_release(struct inode *inode, struct file *file)
{
struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
- thread_with_file_exit(&ctx->thr);
+ bch2_thread_with_file_exit(&ctx->thr);
kfree(ctx);
return 0;
}
@@ -642,10 +475,9 @@ static long bch2_ioctl_data(struct bch_fs *c,
ctx->c = c;
ctx->arg = arg;
- ret = run_thread_with_file(&ctx->thr,
- &bcachefs_data_ops,
- bch2_data_thread,
- "bch-data/%s", c->name);
+ ret = bch2_run_thread_with_file(&ctx->thr,
+ &bcachefs_data_ops,
+ bch2_data_thread);
if (ret < 0)
kfree(ctx);
return ret;
@@ -936,24 +768,32 @@ static int bch2_fsck_online_thread_fn(void *arg)
struct fsck_thread *thr = container_of(arg, struct fsck_thread, thr);
struct bch_fs *c = thr->c;
- c->output_filter = current;
- c->output = &thr->output;
+ c->stdio_filter = current;
+ c->stdio = &thr->thr.stdio;
/*
* XXX: can we figure out a way to do this without mucking with c->opts?
*/
+ unsigned old_fix_errors = c->opts.fix_errors;
if (opt_defined(thr->opts, fix_errors))
c->opts.fix_errors = thr->opts.fix_errors;
+ else
+ c->opts.fix_errors = FSCK_FIX_ask;
+
c->opts.fsck = true;
+ set_bit(BCH_FS_fsck_running, &c->flags);
c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
- bch2_run_online_recovery_passes(c);
+ int ret = bch2_run_online_recovery_passes(c);
+
+ clear_bit(BCH_FS_fsck_running, &c->flags);
+ bch_err_fn(c, ret);
- c->output = NULL;
- c->output_filter = NULL;
+ c->stdio = NULL;
+ c->stdio_filter = NULL;
+ c->opts.fix_errors = old_fix_errors;
- thr->thr.done = true;
- wake_up(&thr->output.wait);
+ thread_with_stdio_done(&thr->thr);
up(&c->online_fsck_mutex);
bch2_ro_ref_put(c);
@@ -988,11 +828,6 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
thr->c = c;
thr->opts = bch2_opts_empty();
- thr->output.buf = PRINTBUF;
- thr->output.buf.atomic++;
- spin_lock_init(&thr->output.lock);
- init_waitqueue_head(&thr->output.wait);
- darray_init(&thr->output2);
if (arg.opts) {
char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
@@ -1005,15 +840,14 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
goto err;
}
- ret = run_thread_with_file(&thr->thr,
- &fsck_thread_ops,
- bch2_fsck_online_thread_fn,
- "bch-fsck");
+ ret = bch2_run_thread_with_stdio(&thr->thr,
+ bch2_fsck_thread_exit,
+ bch2_fsck_online_thread_fn);
err:
if (ret < 0) {
bch_err_fn(c, ret);
if (thr)
- bch2_fsck_thread_free(thr);
+ bch2_fsck_thread_exit(&thr->thr);
up(&c->online_fsck_mutex);
bch2_ro_ref_put(c);
}
diff --git a/libbcachefs/darray.h b/libbcachefs/darray.h
index d867ee62..4b340d13 100644
--- a/libbcachefs/darray.h
+++ b/libbcachefs/darray.h
@@ -20,7 +20,7 @@ struct { \
#define DARRAY(_type) DARRAY_PREALLOCATED(_type, 0)
typedef DARRAY(char) darray_char;
-typedef DARRAY(char *) darray_str;
+typedef DARRAY(char *) darray_str;
int __bch2_darray_resize(darray_char *, size_t, size_t, gfp_t);
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index de5bfc0d..d6418948 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -627,7 +627,7 @@ restart:
prt_printf(&i->buf, "backtrace:");
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
- bch2_prt_task_backtrace(&i->buf, task);
+ bch2_prt_task_backtrace(&i->buf, task, 0);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
@@ -930,8 +930,6 @@ void bch2_debug_exit(void)
int __init bch2_debug_init(void)
{
- int ret = 0;
-
bch_debug = debugfs_create_dir("bcachefs", NULL);
- return ret;
+ return 0;
}
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
index 1cd6ba8d..06a7df52 100644
--- a/libbcachefs/disk_groups.c
+++ b/libbcachefs/disk_groups.c
@@ -557,7 +557,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v)
: NULL;
if (ca && percpu_ref_tryget(&ca->io_ref)) {
- prt_printf(out, "/dev/%pg", ca->disk_sb.bdev);
+ prt_printf(out, "/dev/%s", ca->name);
percpu_ref_put(&ca->io_ref);
} else if (ca) {
prt_printf(out, "offline device %u", t.dev);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index e89185a2..d802bc63 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -3,6 +3,7 @@
/* erasure coding */
#include "bcachefs.h"
+#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
#include "bkey_buf.h"
@@ -156,6 +157,306 @@ void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
}
}
+/* Triggers: */
+
+static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
+ struct bkey_s_c_stripe s,
+ unsigned idx, bool deleting)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_extent_ptr *ptr = &s.v->ptrs[idx];
+ struct btree_iter iter;
+ struct bkey_i_alloc_v4 *a;
+ enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant
+ ? BCH_DATA_parity : 0;
+ s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0;
+ int ret = 0;
+
+ if (deleting)
+ sectors = -sectors;
+
+ a = bch2_trans_start_alloc_update(trans, &iter, PTR_BUCKET_POS(c, ptr));
+ if (IS_ERR(a))
+ return PTR_ERR(a);
+
+ ret = bch2_check_bucket_ref(trans, s.s_c, ptr, sectors, data_type,
+ a->v.gen, a->v.data_type,
+ a->v.dirty_sectors);
+ if (ret)
+ goto err;
+
+ if (!deleting) {
+ if (bch2_trans_inconsistent_on(a->v.stripe ||
+ a->v.stripe_redundancy, trans,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_types[a->v.data_type],
+ a->v.dirty_sectors,
+ a->v.stripe, s.k->p.offset)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ if (bch2_trans_inconsistent_on(data_type && a->v.dirty_sectors, trans,
+ "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ bch2_data_types[a->v.data_type],
+ a->v.dirty_sectors,
+ s.k->p.offset)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ a->v.stripe = s.k->p.offset;
+ a->v.stripe_redundancy = s.v->nr_redundant;
+ a->v.data_type = BCH_DATA_stripe;
+ } else {
+ if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
+ a->v.stripe_redundancy != s.v->nr_redundant, trans,
+ "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)",
+ iter.pos.inode, iter.pos.offset, a->v.gen,
+ s.k->p.offset, a->v.stripe)) {
+ ret = -EIO;
+ goto err;
+ }
+
+ a->v.stripe = 0;
+ a->v.stripe_redundancy = 0;
+ a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
+ }
+
+ a->v.dirty_sectors += sectors;
+ if (data_type)
+ a->v.data_type = !deleting ? data_type : 0;
+
+ ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
+ if (ret)
+ goto err;
+err:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+static int mark_stripe_bucket(struct btree_trans *trans,
+ struct bkey_s_c k,
+ unsigned ptr_idx,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
+ unsigned nr_data = s->nr_blocks - s->nr_redundant;
+ bool parity = ptr_idx >= nr_data;
+ enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
+ s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
+ const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ struct bucket old, new, *g;
+ struct printbuf buf = PRINTBUF;
+ int ret = 0;
+
+ BUG_ON(!(flags & BTREE_TRIGGER_GC));
+
+ /* * XXX doesn't handle deletion */
+
+ percpu_down_read(&c->mark_lock);
+ g = PTR_GC_BUCKET(ca, ptr);
+
+ if (g->dirty_sectors ||
+ (g->stripe && g->stripe != k.k->p.offset)) {
+ bch2_fs_inconsistent(c,
+ "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s",
+ ptr->dev, PTR_BUCKET_NR(ca, ptr), g->gen,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EINVAL;
+ goto err;
+ }
+
+ bucket_lock(g);
+ old = *g;
+
+ ret = bch2_check_bucket_ref(trans, k, ptr, sectors, data_type,
+ g->gen, g->data_type,
+ g->dirty_sectors);
+ if (ret)
+ goto err;
+
+ g->data_type = data_type;
+ g->dirty_sectors += sectors;
+
+ g->stripe = k.k->p.offset;
+ g->stripe_redundancy = s->nr_redundant;
+ new = *g;
+err:
+ bucket_unlock(g);
+ if (!ret)
+ bch2_dev_usage_update_m(c, ca, &old, &new);
+ percpu_up_read(&c->mark_lock);
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_trigger_stripe(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old, struct bkey_s _new,
+ unsigned flags)
+{
+ struct bkey_s_c new = _new.s_c;
+ struct bch_fs *c = trans->c;
+ u64 idx = new.k->p.offset;
+ const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+ ? bkey_s_c_to_stripe(old).v : NULL;
+ const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+ ? bkey_s_c_to_stripe(new).v : NULL;
+
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ /*
+ * If the pointers aren't changing, we don't need to do anything:
+ */
+ if (new_s && old_s &&
+ new_s->nr_blocks == old_s->nr_blocks &&
+ new_s->nr_redundant == old_s->nr_redundant &&
+ !memcmp(old_s->ptrs, new_s->ptrs,
+ new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
+ return 0;
+
+ BUG_ON(new_s && old_s &&
+ (new_s->nr_blocks != old_s->nr_blocks ||
+ new_s->nr_redundant != old_s->nr_redundant));
+
+ if (new_s) {
+ s64 sectors = le16_to_cpu(new_s->sectors);
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, new);
+ int ret = bch2_update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
+ if (ret)
+ return ret;
+ }
+
+ if (old_s) {
+ s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+
+ struct bch_replicas_padded r;
+ bch2_bkey_to_replicas(&r.e, old);
+ int ret = bch2_update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+ if (ret)
+ return ret;
+ }
+
+ unsigned nr_blocks = new_s ? new_s->nr_blocks : old_s->nr_blocks;
+ for (unsigned i = 0; i < nr_blocks; i++) {
+ if (new_s && old_s &&
+ !memcmp(&new_s->ptrs[i],
+ &old_s->ptrs[i],
+ sizeof(new_s->ptrs[i])))
+ continue;
+
+ if (new_s) {
+ int ret = bch2_trans_mark_stripe_bucket(trans,
+ bkey_s_c_to_stripe(new), i, false);
+ if (ret)
+ return ret;
+ }
+
+ if (old_s) {
+ int ret = bch2_trans_mark_stripe_bucket(trans,
+ bkey_s_c_to_stripe(old), i, true);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ if (!(flags & (BTREE_TRIGGER_TRANSACTIONAL|BTREE_TRIGGER_GC))) {
+ struct stripe *m = genradix_ptr(&c->stripes, idx);
+
+ if (!m) {
+ struct printbuf buf1 = PRINTBUF;
+ struct printbuf buf2 = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf1, c, old);
+ bch2_bkey_val_to_text(&buf2, c, new);
+ bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
+ "old %s\n"
+ "new %s", idx, buf1.buf, buf2.buf);
+ printbuf_exit(&buf2);
+ printbuf_exit(&buf1);
+ bch2_inconsistent_error(c);
+ return -1;
+ }
+
+ if (!new_s) {
+ bch2_stripes_heap_del(c, m, idx);
+
+ memset(m, 0, sizeof(*m));
+ } else {
+ m->sectors = le16_to_cpu(new_s->sectors);
+ m->algorithm = new_s->algorithm;
+ m->nr_blocks = new_s->nr_blocks;
+ m->nr_redundant = new_s->nr_redundant;
+ m->blocks_nonempty = 0;
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++)
+ m->blocks_nonempty += !!stripe_blockcount_get(new_s, i);
+
+ if (!old_s)
+ bch2_stripes_heap_insert(c, m, idx);
+ else
+ bch2_stripes_heap_update(c, m, idx);
+ }
+ }
+
+ if (flags & BTREE_TRIGGER_GC) {
+ struct gc_stripe *m =
+ genradix_ptr_alloc(&c->gc_stripes, idx, GFP_KERNEL);
+
+ if (!m) {
+ bch_err(c, "error allocating memory for gc_stripes, idx %llu",
+ idx);
+ return -BCH_ERR_ENOMEM_mark_stripe;
+ }
+ /*
+ * This will be wrong when we bring back runtime gc: we should
+ * be unmarking the old key and then marking the new key
+ */
+ m->alive = true;
+ m->sectors = le16_to_cpu(new_s->sectors);
+ m->nr_blocks = new_s->nr_blocks;
+ m->nr_redundant = new_s->nr_redundant;
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++)
+ m->ptrs[i] = new_s->ptrs[i];
+
+ bch2_bkey_to_replicas(&m->r.e, new);
+
+ /*
+ * gc recalculates this field from stripe ptr
+ * references:
+ */
+ memset(m->block_sectors, 0, sizeof(m->block_sectors));
+
+ for (unsigned i = 0; i < new_s->nr_blocks; i++) {
+ int ret = mark_stripe_bucket(trans, new, i, flags);
+ if (ret)
+ return ret;
+ }
+
+ int ret = bch2_update_replicas(c, new, &m->r.e,
+ ((s64) m->sectors * m->nr_redundant),
+ 0, true);
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_bkey_val_to_text(&buf, c, new);
+ bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
+ printbuf_exit(&buf);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
/* returns blocknr in stripe that we matched: */
static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
struct bkey_s_c k, unsigned *block)
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 7d0237c9..f4369b02 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -12,13 +12,14 @@ int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_stripe_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
+int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_stripe ((struct bkey_ops) { \
.key_invalid = bch2_stripe_invalid, \
.val_to_text = bch2_stripe_to_text, \
.swab = bch2_ptr_swab, \
- .trans_trigger = bch2_trans_mark_stripe, \
- .atomic_trigger = bch2_mark_stripe, \
+ .trigger = bch2_trigger_stripe, \
.min_val_size = 8, \
})
diff --git a/libbcachefs/error.c b/libbcachefs/error.c
index aa4f7f49..d32c8beb 100644
--- a/libbcachefs/error.c
+++ b/libbcachefs/error.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "error.h"
#include "super.h"
+#include "thread_with_file.h"
#define FSCK_ERR_RATELIMIT_NR 10
@@ -27,7 +28,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
void bch2_topology_error(struct bch_fs *c)
{
set_bit(BCH_FS_topology_error, &c->flags);
- if (test_bit(BCH_FS_fsck_done, &c->flags))
+ if (!test_bit(BCH_FS_fsck_running, &c->flags))
bch2_inconsistent_error(c);
}
@@ -69,40 +70,66 @@ enum ask_yn {
YN_ALLYES,
};
+static enum ask_yn parse_yn_response(char *buf)
+{
+ buf = strim(buf);
+
+ if (strlen(buf) == 1)
+ switch (buf[0]) {
+ case 'n':
+ return YN_NO;
+ case 'y':
+ return YN_YES;
+ case 'N':
+ return YN_ALLNO;
+ case 'Y':
+ return YN_ALLYES;
+ }
+ return -1;
+}
+
#ifdef __KERNEL__
-#define bch2_fsck_ask_yn() YN_NO
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
+{
+ struct stdio_redirect *stdio = c->stdio;
+
+ if (c->stdio_filter && c->stdio_filter != current)
+ stdio = NULL;
+
+ if (!stdio)
+ return YN_NO;
+
+ char buf[100];
+ int ret;
+
+ do {
+ bch2_print(c, " (y,n, or Y,N for all errors of this type) ");
+
+ int r = bch2_stdio_redirect_readline(stdio, buf, sizeof(buf) - 1);
+ if (r < 0)
+ return YN_NO;
+ buf[r] = '\0';
+ } while ((ret = parse_yn_response(buf)) < 0);
+
+ return ret;
+}
#else
#include "tools-util.h"
-enum ask_yn bch2_fsck_ask_yn(void)
+static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c)
{
char *buf = NULL;
size_t buflen = 0;
- bool ret;
+ int ret;
- while (true) {
+ do {
fputs(" (y,n, or Y,N for all errors of this type) ", stdout);
fflush(stdout);
if (getline(&buf, &buflen, stdin) < 0)
die("error reading from standard input");
-
- strim(buf);
- if (strlen(buf) != 1)
- continue;
-
- switch (buf[0]) {
- case 'n':
- return YN_NO;
- case 'y':
- return YN_YES;
- case 'N':
- return YN_ALLNO;
- case 'Y':
- return YN_ALLYES;
- }
- }
+ } while ((ret = parse_yn_response(buf)) < 0);
free(buf);
return ret;
@@ -114,7 +141,7 @@ static struct fsck_err_state *fsck_err_get(struct bch_fs *c, const char *fmt)
{
struct fsck_err_state *s;
- if (test_bit(BCH_FS_fsck_done, &c->flags))
+ if (!test_bit(BCH_FS_fsck_running, &c->flags))
return NULL;
list_for_each_entry(s, &c->fsck_error_msgs, list)
@@ -152,7 +179,8 @@ int bch2_fsck_err(struct bch_fs *c,
struct printbuf buf = PRINTBUF, *out = &buf;
int ret = -BCH_ERR_fsck_ignore;
- if (test_bit(err, c->sb.errors_silent))
+ if ((flags & FSCK_CAN_FIX) &&
+ test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;
bch2_sb_error_count(c, err);
@@ -196,7 +224,7 @@ int bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
- if (test_bit(BCH_FS_fsck_done, &c->flags)) {
+ if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
@@ -221,10 +249,13 @@ int bch2_fsck_err(struct bch_fs *c,
int ask;
prt_str(out, ": fix?");
- bch2_print_string_as_lines(KERN_ERR, out->buf);
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s", out->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, out->buf);
print = false;
- ask = bch2_fsck_ask_yn();
+ ask = bch2_fsck_ask_yn(c);
if (ask >= YN_ALLNO && s)
s->fix = ask == YN_ALLNO
@@ -253,10 +284,14 @@ int bch2_fsck_err(struct bch_fs *c,
!(flags & FSCK_CAN_IGNORE)))
ret = -BCH_ERR_fsck_errors_not_fixed;
- if (print)
- bch2_print_string_as_lines(KERN_ERR, out->buf);
+ if (print) {
+ if (bch2_fs_stdio_redirect(c))
+ bch2_print(c, "%s\n", out->buf);
+ else
+ bch2_print_string_as_lines(KERN_ERR, out->buf);
+ }
- if (!test_bit(BCH_FS_fsck_done, &c->flags) &&
+ if (test_bit(BCH_FS_fsck_running, &c->flags) &&
(ret != -BCH_ERR_fsck_fix &&
ret != -BCH_ERR_fsck_ignore))
bch_err(c, "Unable to continue, halting");
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 77ae4476..a855c94d 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -415,8 +415,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.key_invalid = bch2_btree_ptr_invalid, \
.val_to_text = bch2_btree_ptr_to_text, \
.swab = bch2_ptr_swab, \
- .trans_trigger = bch2_trans_mark_extent, \
- .atomic_trigger = bch2_mark_extent, \
+ .trigger = bch2_trigger_extent, \
})
#define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \
@@ -424,8 +423,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned,
.val_to_text = bch2_btree_ptr_v2_to_text, \
.swab = bch2_ptr_swab, \
.compat = bch2_btree_ptr_v2_compat, \
- .trans_trigger = bch2_trans_mark_extent, \
- .atomic_trigger = bch2_mark_extent, \
+ .trigger = bch2_trigger_extent, \
.min_val_size = 40, \
})
@@ -439,8 +437,7 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.swab = bch2_ptr_swab, \
.key_normalize = bch2_extent_normalize, \
.key_merge = bch2_extent_merge, \
- .trans_trigger = bch2_trans_mark_extent, \
- .atomic_trigger = bch2_mark_extent, \
+ .trigger = bch2_trigger_extent, \
})
/* KEY_TYPE_reservation: */
@@ -454,8 +451,7 @@ bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
.key_invalid = bch2_reservation_invalid, \
.val_to_text = bch2_reservation_to_text, \
.key_merge = bch2_reservation_merge, \
- .trans_trigger = bch2_trans_mark_reservation, \
- .atomic_trigger = bch2_mark_reservation, \
+ .trigger = bch2_trigger_reservation, \
.min_val_size = 8, \
})
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index 4496cf91..1c1ea0f0 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -166,10 +166,8 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
- if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
- new_inode->bi_dir = dir_u->bi_inum;
- new_inode->bi_dir_offset = dir_offset;
- }
+ new_inode->bi_dir = dir_u->bi_inum;
+ new_inode->bi_dir_offset = dir_offset;
}
inode_iter.flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
@@ -228,10 +226,8 @@ int bch2_link_trans(struct btree_trans *trans,
if (ret)
goto err;
- if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
- inode_u->bi_dir = dir.inum;
- inode_u->bi_dir_offset = dir_offset;
- }
+ inode_u->bi_dir = dir.inum;
+ inode_u->bi_dir_offset = dir_offset;
ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
bch2_inode_write(trans, &inode_iter, inode_u);
@@ -414,21 +410,19 @@ int bch2_rename_trans(struct btree_trans *trans,
goto err;
}
- if (c->sb.version >= bcachefs_metadata_version_inode_backpointers) {
- src_inode_u->bi_dir = dst_dir_u->bi_inum;
- src_inode_u->bi_dir_offset = dst_offset;
+ src_inode_u->bi_dir = dst_dir_u->bi_inum;
+ src_inode_u->bi_dir_offset = dst_offset;
- if (mode == BCH_RENAME_EXCHANGE) {
- dst_inode_u->bi_dir = src_dir_u->bi_inum;
- dst_inode_u->bi_dir_offset = src_offset;
- }
+ if (mode == BCH_RENAME_EXCHANGE) {
+ dst_inode_u->bi_dir = src_dir_u->bi_inum;
+ dst_inode_u->bi_dir_offset = src_offset;
+ }
- if (mode == BCH_RENAME_OVERWRITE &&
- dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
- dst_inode_u->bi_dir_offset == src_offset) {
- dst_inode_u->bi_dir = 0;
- dst_inode_u->bi_dir_offset = 0;
- }
+ if (mode == BCH_RENAME_OVERWRITE &&
+ dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
+ dst_inode_u->bi_dir_offset == src_offset) {
+ dst_inode_u->bi_dir = 0;
+ dst_inode_u->bi_dir_offset = 0;
}
if (mode == BCH_RENAME_OVERWRITE) {
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 98bd5bab..9fea8976 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -194,6 +194,16 @@ int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
struct bch_fs *c = inode->v.i_sb->s_fs_info;
int ret;
+ /*
+ * check if unlinked, disable/defer until relink
+ */
+
+ /*
+ * also: add a mode where a file is a tmpfile until fully,
+ * asynchronously written
+ */
+
+
ret = file_write_and_wait_range(file, start, end);
if (ret)
goto out;
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index de1617ec..4f0ecd60 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -870,8 +870,7 @@ static int check_inode(struct btree_trans *trans,
return 0;
}
- if (u.bi_flags & BCH_INODE_unlinked &&
- c->sb.version >= bcachefs_metadata_version_deleted_inodes) {
+ if (u.bi_flags & BCH_INODE_unlinked) {
ret = check_inode_deleted_list(trans, k.k->p);
if (ret < 0)
return ret;
@@ -1594,13 +1593,12 @@ static int check_dirent_target(struct btree_trans *trans,
d = dirent_i_to_s_c(n);
}
- if (d.v->d_type == DT_SUBVOL &&
- target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol) &&
- (c->sb.version < bcachefs_metadata_version_subvol_dirent ||
- fsck_err(c, dirent_d_parent_subvol_wrong,
- "dirent has wrong d_parent_subvol field: got %u, should be %u",
- le32_to_cpu(d.v->d_parent_subvol),
- target->bi_parent_subvol))) {
+ if (fsck_err_on(d.v->d_type == DT_SUBVOL &&
+ target->bi_parent_subvol != le32_to_cpu(d.v->d_parent_subvol),
+ c, dirent_d_parent_subvol_wrong,
+ "dirent has wrong d_parent_subvol field: got %u, should be %u",
+ le32_to_cpu(d.v->d_parent_subvol),
+ target->bi_parent_subvol)) {
n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
ret = PTR_ERR_OR_ZERO(n);
if (ret)
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index c39844b8..37dce96f 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -561,64 +561,46 @@ static inline bool bkey_is_deleted_inode(struct bkey_s_c k)
return bkey_inode_flags(k) & BCH_INODE_unlinked;
}
-int bch2_trans_mark_inode(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old,
- struct bkey_i *new,
- unsigned flags)
+int bch2_trigger_inode(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old,
+ struct bkey_s new,
+ unsigned flags)
{
- int nr = bkey_is_inode(&new->k) - bkey_is_inode(old.k);
- bool old_deleted = bkey_is_deleted_inode(old);
- bool new_deleted = bkey_is_deleted_inode(bkey_i_to_s_c(new));
+ s64 nr = bkey_is_inode(new.k) - bkey_is_inode(old.k);
- if (nr) {
- int ret = bch2_replicas_deltas_realloc(trans, 0);
- struct replicas_delta_list *d = trans->fs_usage_deltas;
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ if (nr) {
+ int ret = bch2_replicas_deltas_realloc(trans, 0);
+ if (ret)
+ return ret;
- if (ret)
- return ret;
-
- d->nr_inodes += nr;
- }
+ trans->fs_usage_deltas->nr_inodes += nr;
+ }
- if (old_deleted != new_deleted) {
- int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new->k.p, new_deleted);
- if (ret)
- return ret;
+ bool old_deleted = bkey_is_deleted_inode(old);
+ bool new_deleted = bkey_is_deleted_inode(new.s_c);
+ if (old_deleted != new_deleted) {
+ int ret = bch2_btree_bit_mod(trans, BTREE_ID_deleted_inodes, new.k->p, new_deleted);
+ if (ret)
+ return ret;
+ }
}
- return 0;
-}
+ if (!(flags & BTREE_TRIGGER_TRANSACTIONAL) && (flags & BTREE_TRIGGER_INSERT)) {
+ BUG_ON(!trans->journal_res.seq);
-int bch2_mark_inode(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
-{
- struct bch_fs *c = trans->c;
- struct bch_fs_usage *fs_usage;
- u64 journal_seq = trans->journal_res.seq;
-
- if (flags & BTREE_TRIGGER_INSERT) {
- struct bch_inode_v3 *v = (struct bch_inode_v3 *) new.v;
-
- BUG_ON(!journal_seq);
- BUG_ON(new.k->type != KEY_TYPE_inode_v3);
-
- v->bi_journal_seq = cpu_to_le64(journal_seq);
+ bkey_s_to_inode_v3(new).v->bi_journal_seq = cpu_to_le64(trans->journal_res.seq);
}
if (flags & BTREE_TRIGGER_GC) {
- percpu_down_read(&c->mark_lock);
- preempt_disable();
-
- fs_usage = fs_usage_ptr(c, journal_seq, flags & BTREE_TRIGGER_GC);
- fs_usage->nr_inodes += bkey_is_inode(new.k);
- fs_usage->nr_inodes -= bkey_is_inode(old.k);
+ struct bch_fs *c = trans->c;
- preempt_enable();
+ percpu_down_read(&c->mark_lock);
+ this_cpu_add(c->usage_gc->nr_inodes, nr);
percpu_up_read(&c->mark_lock);
}
+
return 0;
}
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 88818a33..b63f3125 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -17,32 +17,27 @@ int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
-int bch2_trans_mark_inode(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_mark_inode(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_inode ((struct bkey_ops) { \
.key_invalid = bch2_inode_invalid, \
.val_to_text = bch2_inode_to_text, \
- .trans_trigger = bch2_trans_mark_inode, \
- .atomic_trigger = bch2_mark_inode, \
+ .trigger = bch2_trigger_inode, \
.min_val_size = 16, \
})
#define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \
.key_invalid = bch2_inode_v2_invalid, \
.val_to_text = bch2_inode_to_text, \
- .trans_trigger = bch2_trans_mark_inode, \
- .atomic_trigger = bch2_mark_inode, \
+ .trigger = bch2_trigger_inode, \
.min_val_size = 32, \
})
#define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \
.key_invalid = bch2_inode_v3_invalid, \
.val_to_text = bch2_inode_to_text, \
- .trans_trigger = bch2_trans_mark_inode, \
- .atomic_trigger = bch2_mark_inode, \
+ .trigger = bch2_trigger_inode, \
.min_val_size = 48, \
})
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index 42cad83e..93a24fef 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -414,11 +414,11 @@ enum fsck_err_opts {
OPT_BOOL(), \
BCH2_NO_SB_OPT, false, \
NULL, "Allocate the buckets_nouse bitmap") \
- x(log_output, u64, \
+ x(stdio, u64, \
0, \
OPT_UINT(0, S64_MAX), \
BCH2_NO_SB_OPT, false, \
- NULL, "Pointer to a struct log_output") \
+ NULL, "Pointer to a struct stdio_redirect") \
x(project, u8, \
OPT_INODE, \
OPT_BOOL(), \
@@ -458,7 +458,13 @@ enum fsck_err_opts {
OPT_UINT(0, BCH_REPLICAS_MAX), \
BCH2_NO_SB_OPT, 1, \
"n", "Data written to this device will be considered\n"\
- "to have already been replicated n times")
+ "to have already been replicated n times") \
+ x(btree_node_prefetch, u8, \
+ OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ BCH2_NO_SB_OPT, true, \
+ NULL, "BTREE_ITER_PREFETCH casuse btree nodes to be\n"\
+ " prefetched sequentially")
struct bch_opts {
#define x(_name, _bits, ...) unsigned _name##_defined:1;
diff --git a/libbcachefs/printbuf.c b/libbcachefs/printbuf.c
index 187b0377..accf246c 100644
--- a/libbcachefs/printbuf.c
+++ b/libbcachefs/printbuf.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: LGPL-2.1+
/* Copyright (C) 2022 Kent Overstreet */
-#include <linux/err.h>
#include <linux/bitmap.h>
+#include <linux/err.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/slab.h>
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 3e49209d..e1f0da6a 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -575,7 +575,7 @@ u64 bch2_recovery_passes_from_stable(u64 v)
return ret;
}
-static u64 check_version_upgrade(struct bch_fs *c)
+static bool check_version_upgrade(struct bch_fs *c)
{
unsigned latest_compatible = bch2_latest_compatible_version(c->sb.version);
unsigned latest_version = bcachefs_metadata_version_current;
@@ -624,10 +624,15 @@ static u64 check_version_upgrade(struct bch_fs *c)
bch2_version_to_text(&buf, new_version);
prt_newline(&buf);
- u64 recovery_passes = bch2_upgrade_recovery_passes(c, old_version, new_version);
- if (recovery_passes) {
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_upgrade(c, old_version, new_version);
+ passes = ext->recovery_passes_required[0] & ~passes;
+
+ if (passes) {
prt_str(&buf, " running recovery passes: ");
- prt_bitflags(&buf, bch2_recovery_passes, recovery_passes);
+ prt_bitflags(&buf, bch2_recovery_passes,
+ bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
bch_info(c, "%s", buf.buf);
@@ -635,10 +640,6 @@ static u64 check_version_upgrade(struct bch_fs *c)
bch2_sb_upgrade(c, new_version);
printbuf_exit(&buf);
-
- struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
- ext->recovery_passes_required[0] |=
- cpu_to_le64(bch2_recovery_passes_to_stable(recovery_passes));
return true;
}
@@ -795,23 +796,17 @@ int bch2_fs_recovery(struct bch_fs *c)
prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
- c->recovery_passes_explicit |= sb_passes;
}
- if (bcachefs_metadata_version_current < c->sb.version) {
+ if (bch2_check_version_downgrade(c)) {
struct printbuf buf = PRINTBUF;
prt_str(&buf, "Version downgrade required:\n");
- u64 passes = ext->recovery_passes_required[0];
- ret = bch2_sb_set_downgrade(c,
+ __le64 passes = ext->recovery_passes_required[0];
+ bch2_sb_set_downgrade(c,
BCH_VERSION_MINOR(bcachefs_metadata_version_current),
BCH_VERSION_MINOR(c->sb.version));
- if (ret) {
- mutex_unlock(&c->sb_lock);
- goto err;
- }
-
passes = ext->recovery_passes_required[0] & ~passes;
if (passes) {
prt_str(&buf, " running recovery passes: ");
@@ -821,8 +816,6 @@ int bch2_fs_recovery(struct bch_fs *c)
bch_info(c, "%s", buf.buf);
printbuf_exit(&buf);
-
- bch2_sb_maybe_downgrade(c);
write_sb = true;
}
@@ -839,6 +832,9 @@ int bch2_fs_recovery(struct bch_fs *c)
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+ if (c->opts.fsck)
+ set_bit(BCH_FS_fsck_running, &c->flags);
+
ret = bch2_blacklist_table_initialize(c);
if (ret) {
bch_err(c, "error initializing blacklist table");
@@ -979,6 +975,8 @@ use_clean:
if (ret)
goto err;
+ clear_bit(BCH_FS_fsck_running, &c->flags);
+
/* If we fixed errors, verify that fs is actually clean now: */
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
test_bit(BCH_FS_errors_fixed, &c->flags) &&
@@ -1073,7 +1071,6 @@ use_clean:
ret = 0;
out:
- set_bit(BCH_FS_fsck_done, &c->flags);
bch2_flush_fsck_errs(c);
if (!c->opts.keep_journal &&
@@ -1109,7 +1106,7 @@ int bch2_fs_initialize(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
- bch2_sb_maybe_downgrade(c);
+ bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
bch2_sb_upgrade(c, bcachefs_metadata_version_current);
@@ -1120,7 +1117,6 @@ int bch2_fs_initialize(struct bch_fs *c)
c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
set_bit(BCH_FS_may_go_rw, &c->flags);
- set_bit(BCH_FS_fsck_done, &c->flags);
for (unsigned i = 0; i < BTREE_ID_NR; i++)
bch2_btree_root_alloc(c, i);
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 9f9c8a24..b24b71bc 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -34,15 +34,14 @@ int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+ int ret = 0;
- if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
- le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad)) {
- prt_printf(err, "idx < front_pad (%llu < %u)",
- le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
- return -EINVAL;
- }
-
- return 0;
+ bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad),
+ c, err, reflink_p_front_pad_bad,
+ "idx < front_pad (%llu < %u)",
+ le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad));
+fsck_err:
+ return ret;
}
void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
@@ -74,7 +73,7 @@ bool bch2_reflink_p_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
return true;
}
-static int trans_mark_reflink_p_segment(struct btree_trans *trans,
+static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
struct bkey_s_c_reflink_p p,
u64 *idx, unsigned flags)
{
@@ -93,7 +92,7 @@ static int trans_mark_reflink_p_segment(struct btree_trans *trans,
if (ret)
goto err;
- refcount = bkey_refcount(k);
+ refcount = bkey_refcount(bkey_i_to_s(k));
if (!refcount) {
bch2_bkey_val_to_text(&buf, c, p.s_c);
bch2_trans_inconsistent(trans,
@@ -141,47 +140,16 @@ err:
return ret;
}
-static int __trans_mark_reflink_p(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c k, unsigned flags)
-{
- struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- u64 idx, end_idx;
- int ret = 0;
-
- idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
- end_idx = le64_to_cpu(p.v->idx) + p.k->size +
- le32_to_cpu(p.v->back_pad);
-
- while (idx < end_idx && !ret)
- ret = trans_mark_reflink_p_segment(trans, p, &idx, flags);
- return ret;
-}
-
-int bch2_trans_mark_reflink_p(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old,
- struct bkey_i *new,
- unsigned flags)
-{
- if (flags & BTREE_TRIGGER_INSERT) {
- struct bch_reflink_p *v = &bkey_i_to_reflink_p(new)->v;
-
- v->front_pad = v->back_pad = 0;
- }
-
- return trigger_run_overwrite_then_insert(__trans_mark_reflink_p, trans, btree_id, level, old, new, flags);
-}
-
-static s64 __bch2_mark_reflink_p(struct btree_trans *trans,
- struct bkey_s_c_reflink_p p,
- u64 start, u64 end,
- u64 *idx, unsigned flags, size_t r_idx)
+static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans,
+ struct bkey_s_c_reflink_p p,
+ u64 *idx, unsigned flags, size_t r_idx)
{
struct bch_fs *c = trans->c;
struct reflink_gc *r;
int add = !(flags & BTREE_TRIGGER_OVERWRITE) ? 1 : -1;
- u64 next_idx = end;
+ u64 start = le64_to_cpu(p.v->idx);
+ u64 end = le64_to_cpu(p.v->idx) + p.k->size;
+ u64 next_idx = end + le32_to_cpu(p.v->back_pad);
s64 ret = 0;
struct printbuf buf = PRINTBUF;
@@ -205,20 +173,24 @@ not_found:
" missing range %llu-%llu",
(bch2_bkey_val_to_text(&buf, c, p.s_c), buf.buf),
*idx, next_idx)) {
- struct bkey_i_error *new;
-
- new = bch2_trans_kmalloc(trans, sizeof(*new));
- ret = PTR_ERR_OR_ZERO(new);
+ struct bkey_i *update = bch2_bkey_make_mut_noupdate(trans, p.s_c);
+ ret = PTR_ERR_OR_ZERO(update);
if (ret)
goto err;
- bkey_init(&new->k);
- new->k.type = KEY_TYPE_error;
- new->k.p = bkey_start_pos(p.k);
- new->k.p.offset += *idx - start;
- bch2_key_resize(&new->k, next_idx - *idx);
- ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i,
- BTREE_TRIGGER_NORUN);
+ if (next_idx <= start) {
+ bkey_i_to_reflink_p(update)->v.front_pad = cpu_to_le32(start - next_idx);
+ } else if (*idx >= end) {
+ bkey_i_to_reflink_p(update)->v.back_pad = cpu_to_le32(*idx - end);
+ } else {
+ bkey_error_init(update);
+ update->k.p = p.k->p;
+ update->k.p.offset = next_idx;
+ update->k.size = next_idx - *idx;
+ set_bkey_val_u64s(&update->k, 0);
+ }
+
+ ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, update, BTREE_TRIGGER_NORUN);
}
*idx = next_idx;
@@ -228,50 +200,55 @@ fsck_err:
return ret;
}
-static int __mark_reflink_p(struct btree_trans *trans,
+static int __trigger_reflink_p(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
struct bkey_s_c k, unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
- struct reflink_gc *ref;
- size_t l, r, m;
- u64 idx = le64_to_cpu(p.v->idx), start = idx;
- u64 end = le64_to_cpu(p.v->idx) + p.k->size;
int ret = 0;
- BUG_ON(!(flags & BTREE_TRIGGER_GC));
+ u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+ u64 end = le64_to_cpu(p.v->idx) + p.k->size + le32_to_cpu(p.v->back_pad);
- if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_reflink_p_fix) {
- idx -= le32_to_cpu(p.v->front_pad);
- end += le32_to_cpu(p.v->back_pad);
+ if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
+ while (idx < end && !ret)
+ ret = trans_trigger_reflink_p_segment(trans, p, &idx, flags);
}
- l = 0;
- r = c->reflink_gc_nr;
- while (l < r) {
- m = l + (r - l) / 2;
+ if (flags & BTREE_TRIGGER_GC) {
+ size_t l = 0, r = c->reflink_gc_nr;
- ref = genradix_ptr(&c->reflink_gc_table, m);
- if (ref->offset <= idx)
- l = m + 1;
- else
- r = m;
- }
+ while (l < r) {
+ size_t m = l + (r - l) / 2;
+ struct reflink_gc *ref = genradix_ptr(&c->reflink_gc_table, m);
+ if (ref->offset <= idx)
+ l = m + 1;
+ else
+ r = m;
+ }
- while (idx < end && !ret)
- ret = __bch2_mark_reflink_p(trans, p, start, end,
- &idx, flags, l++);
+ while (idx < end && !ret)
+ ret = gc_trigger_reflink_p_segment(trans, p, &idx, flags, l++);
+ }
return ret;
}
-int bch2_mark_reflink_p(struct btree_trans *trans,
- enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_s_c new,
- unsigned flags)
+int bch2_trigger_reflink_p(struct btree_trans *trans,
+ enum btree_id btree_id, unsigned level,
+ struct bkey_s_c old,
+ struct bkey_s new,
+ unsigned flags)
{
- return mem_trigger_run_overwrite_then_insert(__mark_reflink_p, trans, btree_id, level, old, new, flags);
+ if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+ (flags & BTREE_TRIGGER_INSERT)) {
+ struct bch_reflink_p *v = bkey_s_to_reflink_p(new).v;
+
+ v->front_pad = v->back_pad = 0;
+ }
+
+ return trigger_run_overwrite_then_insert(__trigger_reflink_p, trans, btree_id, level, old, new, flags);
}
/* indirect extents */
@@ -305,32 +282,34 @@ bool bch2_reflink_v_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r
}
#endif
-static inline void check_indirect_extent_deleting(struct bkey_i *new, unsigned *flags)
+static inline void check_indirect_extent_deleting(struct bkey_s new, unsigned *flags)
{
if ((*flags & BTREE_TRIGGER_INSERT) && !*bkey_refcount(new)) {
- new->k.type = KEY_TYPE_deleted;
- new->k.size = 0;
- set_bkey_val_u64s(&new->k, 0);
+ new.k->type = KEY_TYPE_deleted;
+ new.k->size = 0;
+ set_bkey_val_u64s(new.k, 0);
*flags &= ~BTREE_TRIGGER_INSERT;
}
}
int bch2_trans_mark_reflink_v(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
+ struct bkey_s_c old, struct bkey_s new,
unsigned flags)
{
- check_indirect_extent_deleting(new, &flags);
+ if ((flags & BTREE_TRIGGER_TRANSACTIONAL) &&
+ (flags & BTREE_TRIGGER_INSERT))
+ check_indirect_extent_deleting(new, &flags);
if (old.k->type == KEY_TYPE_reflink_v &&
- new->k.type == KEY_TYPE_reflink_v &&
- old.k->u64s == new->k.u64s &&
+ new.k->type == KEY_TYPE_reflink_v &&
+ old.k->u64s == new.k->u64s &&
!memcmp(bkey_s_c_to_reflink_v(old).v->start,
- bkey_i_to_reflink_v(new)->v.start,
- bkey_val_bytes(&new->k) - 8))
+ bkey_s_to_reflink_v(new).v->start,
+ bkey_val_bytes(new.k) - 8))
return 0;
- return bch2_trans_mark_extent(trans, btree_id, level, old, new, flags);
+ return bch2_trigger_extent(trans, btree_id, level, old, new, flags);
}
/* indirect inline data */
@@ -355,7 +334,7 @@ void bch2_indirect_inline_data_to_text(struct printbuf *out,
int bch2_trans_mark_indirect_inline_data(struct btree_trans *trans,
enum btree_id btree_id, unsigned level,
- struct bkey_s_c old, struct bkey_i *new,
+ struct bkey_s_c old, struct bkey_s new,
unsigned flags)
{
check_indirect_extent_deleting(new, &flags);
@@ -398,7 +377,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k));
- refcount = bkey_refcount(r_v);
+ refcount = bkey_refcount(bkey_i_to_s(r_v));
*refcount = 0;
memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k));
diff --git a/libbcachefs/reflink.h b/libbcachefs/reflink.h
index 6cc9c4a7..8ee778ec 100644
--- a/libbcachefs/reflink.h
+++ b/libbcachefs/reflink.h
@@ -9,17 +9,14 @@ int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c,
void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c);
-int bch2_trans_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *, unsigned);
-int bch2_mark_reflink_p(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
+int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned,
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \
.key_invalid = bch2_reflink_p_invalid, \
.val_to_text = bch2_reflink_p_to_text, \
.key_merge = bch2_reflink_p_merge, \
- .trans_trigger = bch2_trans_mark_reflink_p, \
- .atomic_trigger = bch2_mark_reflink_p, \
+ .trigger = bch2_trigger_reflink_p, \
.min_val_size = 16, \
})
@@ -28,14 +25,13 @@ int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c,
void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
int bch2_trans_mark_reflink_v(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *, unsigned);
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \
.key_invalid = bch2_reflink_v_invalid, \
.val_to_text = bch2_reflink_v_to_text, \
.swab = bch2_ptr_swab, \
- .trans_trigger = bch2_trans_mark_reflink_v, \
- .atomic_trigger = bch2_mark_extent, \
+ .trigger = bch2_trans_mark_reflink_v, \
.min_val_size = 8, \
})
@@ -45,13 +41,13 @@ void bch2_indirect_inline_data_to_text(struct printbuf *,
struct bch_fs *, struct bkey_s_c);
int bch2_trans_mark_indirect_inline_data(struct btree_trans *,
enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_i *,
+ struct bkey_s_c, struct bkey_s,
unsigned);
#define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \
.key_invalid = bch2_indirect_inline_data_invalid, \
.val_to_text = bch2_indirect_inline_data_to_text, \
- .trans_trigger = bch2_trans_mark_indirect_inline_data, \
+ .trigger = bch2_trans_mark_indirect_inline_data, \
.min_val_size = 8, \
})
@@ -67,13 +63,13 @@ static inline const __le64 *bkey_refcount_c(struct bkey_s_c k)
}
}
-static inline __le64 *bkey_refcount(struct bkey_i *k)
+static inline __le64 *bkey_refcount(struct bkey_s k)
{
- switch (k->k.type) {
+ switch (k.k->type) {
case KEY_TYPE_reflink_v:
- return &bkey_i_to_reflink_v(k)->v.refcount;
+ return &bkey_s_to_reflink_v(k).v->refcount;
case KEY_TYPE_indirect_inline_data:
- return &bkey_i_to_indirect_inline_data(k)->v.refcount;
+ return &bkey_s_to_indirect_inline_data(k).v->refcount;
default:
return NULL;
}
diff --git a/libbcachefs/sb-downgrade.c b/libbcachefs/sb-downgrade.c
index d2a92fb0..441dcb1b 100644
--- a/libbcachefs/sb-downgrade.c
+++ b/libbcachefs/sb-downgrade.c
@@ -12,36 +12,105 @@
#include "sb-errors.h"
#include "super-io.h"
+#define RECOVERY_PASS_ALL_FSCK BIT_ULL(63)
+
/*
- * Downgrade table:
- * When dowgrading past certain versions, we need to run certain recovery passes
- * and fix certain errors:
+ * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
*
* x(version, recovery_passes, errors...)
*/
-
-#define DOWNGRADE_TABLE() \
- x(disk_accounting_v2, \
- BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info), \
- BCH_FSCK_ERR_dev_usage_buckets_wrong)
-
-struct downgrade_entry {
+#define UPGRADE_TABLE() \
+ x(backpointers, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(inode_v3, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(unwritten_extents, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(bucket_gens, \
+ BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(lru_v2, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(fragmentation_lru, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(no_bps_in_alloc_keys, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_trees, \
+ RECOVERY_PASS_ALL_FSCK) \
+ x(snapshot_skiplists, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \
+ BCH_FSCK_ERR_snapshot_bad_depth, \
+ BCH_FSCK_ERR_snapshot_bad_skiplist) \
+ x(deleted_inodes, \
+ BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
+ BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
+ x(rebalance_work, \
+ BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))
+
+#define DOWNGRADE_TABLE()
+
+struct upgrade_downgrade_entry {
u64 recovery_passes;
u16 version;
u16 nr_errors;
const u16 *errors;
};
-#define x(ver, passes, ...) static const u16 ver_##errors[] = { __VA_ARGS__ };
+#define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
+UPGRADE_TABLE()
+#undef x
+
+static const struct upgrade_downgrade_entry upgrade_table[] = {
+#define x(ver, passes, ...) { \
+ .recovery_passes = passes, \
+ .version = bcachefs_metadata_version_##ver,\
+ .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
+ .errors = upgrade_##ver##_errors, \
+},
+UPGRADE_TABLE()
+#undef x
+};
+
+void bch2_sb_set_upgrade(struct bch_fs *c,
+ unsigned old_version,
+ unsigned new_version)
+{
+ lockdep_assert_held(&c->sb_lock);
+
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+ for (const struct upgrade_downgrade_entry *i = upgrade_table;
+ i < upgrade_table + ARRAY_SIZE(upgrade_table);
+ i++)
+ if (i->version > old_version && i->version <= new_version) {
+ u64 passes = i->recovery_passes;
+
+ if (passes & RECOVERY_PASS_ALL_FSCK)
+ passes |= bch2_fsck_recovery_passes();
+ passes &= ~RECOVERY_PASS_ALL_FSCK;
+
+ ext->recovery_passes_required[0] |=
+ cpu_to_le64(bch2_recovery_passes_to_stable(passes));
+
+ for (const u16 *e = i->errors;
+ e < i->errors + i->nr_errors;
+ e++) {
+ __set_bit(*e, c->sb.errors_silent);
+ ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64));
+ }
+ }
+}
+
+#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ };
DOWNGRADE_TABLE()
#undef x
-static const struct downgrade_entry downgrade_table[] = {
+static const struct upgrade_downgrade_entry downgrade_table[] = {
#define x(ver, passes, ...) { \
.recovery_passes = passes, \
.version = bcachefs_metadata_version_##ver,\
- .nr_errors = ARRAY_SIZE(ver_##errors), \
- .errors = ver_##errors, \
+ .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \
+ .errors = downgrade_##ver##_errors, \
},
DOWNGRADE_TABLE()
#undef x
@@ -59,12 +128,6 @@ downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
(void *) &_i->errors[0] < vstruct_end(&(_d)->field); \
_i = downgrade_entry_next_c(_i))
-static inline unsigned bch2_sb_field_downgrade_u64s(unsigned nr)
-{
- return (sizeof(struct bch_sb_field_downgrade) +
- sizeof(struct bch_sb_field_downgrade_entry) * nr) / sizeof(u64);
-}
-
static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
struct printbuf *err)
{
@@ -127,7 +190,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
darray_char table = {};
int ret = 0;
- for (const struct downgrade_entry *src = downgrade_table;
+ for (const struct upgrade_downgrade_entry *src = downgrade_table;
src < downgrade_table + ARRAY_SIZE(downgrade_table);
src++) {
if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
@@ -171,11 +234,11 @@ out:
return ret;
}
-int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
+void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
{
struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
if (!d)
- return 0;
+ return;
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
@@ -194,6 +257,4 @@ int bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_min
}
}
}
-
- return 0;
}
diff --git a/libbcachefs/sb-downgrade.h b/libbcachefs/sb-downgrade.h
index 0703ad7e..57e6c916 100644
--- a/libbcachefs/sb-downgrade.h
+++ b/libbcachefs/sb-downgrade.h
@@ -5,6 +5,7 @@
extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade;
int bch2_sb_downgrade_update(struct bch_fs *);
-int bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
+void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned);
+void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_SB_DOWNGRADE_H */
diff --git a/libbcachefs/sb-errors_types.h b/libbcachefs/sb-errors_types.h
index e7be1f9b..c08aacdf 100644
--- a/libbcachefs/sb-errors_types.h
+++ b/libbcachefs/sb-errors_types.h
@@ -249,7 +249,8 @@
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243) \
- x(unlinked_inode_not_on_deleted_list, 244)
+ x(unlinked_inode_not_on_deleted_list, 244) \
+ x(reflink_p_front_pad_bad, 245)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index 4c19a809..a44a238b 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -266,7 +266,7 @@ static void member_to_text(struct printbuf *out,
prt_str(out, "Durability:");
prt_tab(out);
- prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m));
+ prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
prt_newline(out);
prt_printf(out, "Discard:");
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index 96df4052..56af9375 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -276,7 +276,7 @@ static void set_is_ancestor_bitmap(struct bch_fs *c, u32 id)
mutex_unlock(&c->snapshot_table_lock);
}
-int bch2_mark_snapshot(struct btree_trans *trans,
+static int __bch2_mark_snapshot(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s_c new,
unsigned flags)
@@ -330,6 +330,14 @@ err:
return ret;
}
+int bch2_mark_snapshot(struct btree_trans *trans,
+ enum btree_id btree, unsigned level,
+ struct bkey_s_c old, struct bkey_s new,
+ unsigned flags)
+{
+ return __bch2_mark_snapshot(trans, btree, level, old, new.s_c, flags);
+}
+
int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
struct bch_snapshot *s)
{
@@ -806,11 +814,10 @@ static int check_snapshot(struct btree_trans *trans,
real_depth = bch2_snapshot_depth(c, parent_id);
- if (le32_to_cpu(s.depth) != real_depth &&
- (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
- fsck_err(c, snapshot_bad_depth,
- "snapshot with incorrect depth field, should be %u:\n %s",
- real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+ if (fsck_err_on(le32_to_cpu(s.depth) != real_depth,
+ c, snapshot_bad_depth,
+ "snapshot with incorrect depth field, should be %u:\n %s",
+ real_depth, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
@@ -824,11 +831,9 @@ static int check_snapshot(struct btree_trans *trans,
if (ret < 0)
goto err;
- if (!ret &&
- (c->sb.version_upgrade_complete < bcachefs_metadata_version_snapshot_skiplists ||
- fsck_err(c, snapshot_bad_skiplist,
- "snapshot with bad skiplist field:\n %s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))) {
+ if (fsck_err_on(!ret, c, snapshot_bad_skiplist,
+ "snapshot with bad skiplist field:\n %s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
u = bch2_bkey_make_mut_typed(trans, iter, &k, 0, snapshot);
ret = PTR_ERR_OR_ZERO(u);
if (ret)
@@ -1055,7 +1060,7 @@ static int create_snapids(struct btree_trans *trans, u32 parent, u32 tree,
bubble_sort(n->v.skip, ARRAY_SIZE(n->v.skip), cmp_le32);
SET_BCH_SNAPSHOT_SUBVOL(&n->v, true);
- ret = bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+ ret = __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
bkey_s_c_null, bkey_i_to_s_c(&n->k_i), 0);
if (ret)
goto err;
@@ -1664,7 +1669,7 @@ int bch2_snapshots_read(struct bch_fs *c)
int ret = bch2_trans_run(c,
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k,
- bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
+ __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?:
bch2_snapshot_set_equiv(trans, k) ?:
bch2_check_snapshot_needs_deletion(trans, k)) ?:
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
diff --git a/libbcachefs/snapshot.h b/libbcachefs/snapshot.h
index 94f35b2c..7c66ffc0 100644
--- a/libbcachefs/snapshot.h
+++ b/libbcachefs/snapshot.h
@@ -22,12 +22,12 @@ void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c,
enum bkey_invalid_flags, struct printbuf *);
int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
- struct bkey_s_c, struct bkey_s_c, unsigned);
+ struct bkey_s_c, struct bkey_s, unsigned);
#define bch2_bkey_ops_snapshot ((struct bkey_ops) { \
.key_invalid = bch2_snapshot_invalid, \
.val_to_text = bch2_snapshot_to_text, \
- .atomic_trigger = bch2_mark_snapshot, \
+ .trigger = bch2_mark_snapshot, \
.min_val_size = 24, \
})
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 7cbf496d..ea869217 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -30,14 +30,12 @@ static const struct blk_holder_ops bch2_sb_handle_bdev_ops = {
struct bch2_metadata_version {
u16 version;
const char *name;
- u64 recovery_passes;
};
static const struct bch2_metadata_version bch2_metadata_versions[] = {
-#define x(n, v, _recovery_passes) { \
+#define x(n, v) { \
.version = v, \
.name = #n, \
- .recovery_passes = _recovery_passes, \
},
BCH_METADATA_VERSIONS()
#undef x
@@ -70,24 +68,6 @@ unsigned bch2_latest_compatible_version(unsigned v)
return v;
}
-u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
- unsigned old_version,
- unsigned new_version)
-{
- u64 ret = 0;
-
- for (const struct bch2_metadata_version *i = bch2_metadata_versions;
- i < bch2_metadata_versions + ARRAY_SIZE(bch2_metadata_versions);
- i++)
- if (i->version > old_version && i->version <= new_version) {
- if (i->recovery_passes & RECOVERY_PASS_ALL_FSCK)
- ret |= bch2_fsck_recovery_passes();
- ret |= i->recovery_passes;
- }
-
- return ret &= ~RECOVERY_PASS_ALL_FSCK;
-}
-
const char * const bch2_sb_fields[] = {
#define x(name, nr) #name,
BCH_SB_FIELDS()
@@ -190,8 +170,12 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s)
u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits;
if (new_bytes > max_bytes) {
- pr_err("%pg: superblock too big: want %zu but have %llu",
- sb->bdev, new_bytes, max_bytes);
+ struct printbuf buf = PRINTBUF;
+
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes);
+ pr_err("%s", buf.buf);
+ printbuf_exit(&buf);
return -BCH_ERR_ENOSPC_sb;
}
}
@@ -1095,8 +1079,10 @@ void __bch2_check_set_feature(struct bch_fs *c, unsigned feat)
}
/* Downgrade if superblock is at a higher version than currently supported: */
-void bch2_sb_maybe_downgrade(struct bch_fs *c)
+bool bch2_check_version_downgrade(struct bch_fs *c)
{
+ bool ret = bcachefs_metadata_version_current < c->sb.version;
+
lockdep_assert_held(&c->sb_lock);
/*
@@ -1110,6 +1096,7 @@ void bch2_sb_maybe_downgrade(struct bch_fs *c)
if (c->sb.version_min > bcachefs_metadata_version_current)
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
+ return ret;
}
void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version)
@@ -1200,8 +1187,8 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f,
return ret;
}
-void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
- struct bch_sb_field *f)
+void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
{
unsigned type = le32_to_cpu(f->type);
const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type);
@@ -1209,6 +1196,15 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 32);
+ if (ops->to_text)
+ ops->to_text(out, sb, f);
+}
+
+void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
+ struct bch_sb_field *f)
+{
+ unsigned type = le32_to_cpu(f->type);
+
if (type < BCH_SB_FIELD_NR)
prt_printf(out, "%s", bch2_sb_fields[type]);
else
@@ -1217,11 +1213,7 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
prt_printf(out, " (size %zu):", vstruct_bytes(f));
prt_newline(out);
- if (ops->to_text) {
- printbuf_indent_add(out, 2);
- ops->to_text(out, sb, f);
- printbuf_indent_sub(out, 2);
- }
+ __bch2_sb_field_to_text(out, sb, f);
}
void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l)
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index 1a8c2088..95e80e06 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -19,10 +19,6 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, unsigned);
unsigned bch2_latest_compatible_version(unsigned);
-u64 bch2_upgrade_recovery_passes(struct bch_fs *c,
- unsigned,
- unsigned);
-
static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
{
return le32_to_cpu(f->u64s) * sizeof(u64);
@@ -94,9 +90,11 @@ static inline void bch2_check_set_feature(struct bch_fs *c, unsigned feat)
__bch2_check_set_feature(c, feat);
}
-void bch2_sb_maybe_downgrade(struct bch_fs *);
+bool bch2_check_version_downgrade(struct bch_fs *);
void bch2_sb_upgrade(struct bch_fs *, unsigned);
+void __bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
+ struct bch_sb_field *);
void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *,
struct bch_sb_field *);
void bch2_sb_layout_to_text(struct printbuf *, struct bch_sb_layout *);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 0f3a924c..9dbc3594 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -88,14 +88,11 @@ const char * const bch2_fs_flag_strs[] = {
void __bch2_print(struct bch_fs *c, const char *fmt, ...)
{
- struct log_output *output = c->output;
- va_list args;
-
- if (c->output_filter && c->output_filter != current)
- output = NULL;
+ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c);
+ va_list args;
va_start(args, fmt);
- if (likely(!output)) {
+ if (likely(!stdio)) {
vprintk(fmt, args);
} else {
unsigned long flags;
@@ -103,11 +100,11 @@ void __bch2_print(struct bch_fs *c, const char *fmt, ...)
if (fmt[0] == KERN_SOH[0])
fmt += 2;
- spin_lock_irqsave(&output->lock, flags);
- prt_vprintf(&output->buf, fmt, args);
- spin_unlock_irqrestore(&output->lock, flags);
+ spin_lock_irqsave(&stdio->output_lock, flags);
+ prt_vprintf(&stdio->output_buf, fmt, args);
+ spin_unlock_irqrestore(&stdio->output_lock, flags);
- wake_up(&output->wait);
+ wake_up(&stdio->output_wait);
}
va_end(args);
}
@@ -724,7 +721,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
goto out;
}
- c->output = (void *)(unsigned long) opts.log_output;
+ c->stdio = (void *)(unsigned long) opts.stdio;
__module_get(THIS_MODULE);
@@ -871,7 +868,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->io_complete_wq = alloc_workqueue("bcachefs_io",
- WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 1)) ||
+ WQ_FREEZABLE|WQ_HIGHPRI|WQ_MEM_RECLAIM, 512)) ||
!(c->write_ref_wq = alloc_workqueue("bcachefs_write_ref",
WQ_FREEZABLE, 0)) ||
#ifndef BCH_WRITE_REF_DEBUG
@@ -1086,17 +1083,22 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
fs->sb->write_time != sb->sb->write_time) {
struct printbuf buf = PRINTBUF;
- prt_printf(&buf, "Split brain detected between %pg and %pg:",
- sb->bdev, fs->bdev);
+ prt_str(&buf, "Split brain detected between ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_str(&buf, " and ");
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ':');
prt_newline(&buf);
prt_printf(&buf, "seq=%llu but write_time different, got", le64_to_cpu(sb->sb->seq));
prt_newline(&buf);
- prt_printf(&buf, "%pg ", fs->bdev);
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ' ');
bch2_prt_datetime(&buf, le64_to_cpu(fs->sb->write_time));;
prt_newline(&buf);
- prt_printf(&buf, "%pg ", sb->bdev);
+ prt_bdevname(&buf, sb->bdev);
+ prt_char(&buf, ' ');
bch2_prt_datetime(&buf, le64_to_cpu(sb->sb->write_time));;
prt_newline(&buf);
@@ -1112,13 +1114,26 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs,
u64 seq_from_member = le64_to_cpu(sb->sb->seq);
if (seq_from_fs && seq_from_fs < seq_from_member) {
- pr_err("Split brain detected between %pg and %pg:\n"
- "%pg believes seq of %pg to be %llu, but %pg has %llu\n"
- "Not using %pg",
- sb->bdev, fs->bdev,
- fs->bdev, sb->bdev, seq_from_fs,
- sb->bdev, seq_from_member,
- sb->bdev);
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "Split brain detected between ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_str(&buf, " and ");
+ prt_bdevname(&buf, fs->bdev);
+ prt_char(&buf, ':');
+ prt_newline(&buf);
+
+ prt_bdevname(&buf, fs->bdev);
+ prt_str(&buf, "believes seq of ");
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, " to be %llu, but ", seq_from_fs);
+ prt_bdevname(&buf, sb->bdev);
+ prt_printf(&buf, " has %llu\n", seq_from_member);
+ prt_str(&buf, "Not using ");
+ prt_bdevname(&buf, sb->bdev);
+
+ pr_err("%s", buf.buf);
+ printbuf_exit(&buf);
return -BCH_ERR_device_splitbrain;
}
@@ -1367,9 +1382,14 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
bch2_dev_sysfs_online(c, ca);
+ struct printbuf name = PRINTBUF;
+ prt_bdevname(&name, ca->disk_sb.bdev);
+
if (c->sb.nr_devices == 1)
- snprintf(c->name, sizeof(c->name), "%pg", ca->disk_sb.bdev);
- snprintf(ca->name, sizeof(ca->name), "%pg", ca->disk_sb.bdev);
+ strlcpy(c->name, name.buf, sizeof(c->name));
+ strlcpy(ca->name, name.buf, sizeof(ca->name));
+
+ printbuf_exit(&name);
rebalance_wakeup(c);
return 0;
diff --git a/libbcachefs/thread_with_file.c b/libbcachefs/thread_with_file.c
new file mode 100644
index 00000000..b1c867aa
--- /dev/null
+++ b/libbcachefs/thread_with_file.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef NO_BCACHEFS_FS
+
+#include "bcachefs.h"
+#include "printbuf.h"
+#include "thread_with_file.h"
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/kthread.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+
+void bch2_thread_with_file_exit(struct thread_with_file *thr)
+{
+ if (thr->task) {
+ kthread_stop(thr->task);
+ put_task_struct(thr->task);
+ }
+}
+
+int bch2_run_thread_with_file(struct thread_with_file *thr,
+ const struct file_operations *fops,
+ int (*fn)(void *))
+{
+ struct file *file = NULL;
+ int ret, fd = -1;
+ unsigned fd_flags = O_CLOEXEC;
+
+ if (fops->read && fops->write)
+ fd_flags |= O_RDWR;
+ else if (fops->read)
+ fd_flags |= O_RDONLY;
+ else if (fops->write)
+ fd_flags |= O_WRONLY;
+
+ char name[TASK_COMM_LEN];
+ get_task_comm(name, current);
+
+ thr->ret = 0;
+ thr->task = kthread_create(fn, thr, "%s", name);
+ ret = PTR_ERR_OR_ZERO(thr->task);
+ if (ret)
+ return ret;
+
+ ret = get_unused_fd_flags(fd_flags);
+ if (ret < 0)
+ goto err;
+ fd = ret;
+
+ file = anon_inode_getfile(name, fops, thr, fd_flags);
+ ret = PTR_ERR_OR_ZERO(file);
+ if (ret)
+ goto err;
+
+ fd_install(fd, file);
+ get_task_struct(thr->task);
+ wake_up_process(thr->task);
+ return fd;
+err:
+ if (fd >= 0)
+ put_unused_fd(fd);
+ if (thr->task)
+ kthread_stop(thr->task);
+ return ret;
+}
+
+static inline bool thread_with_stdio_has_output(struct thread_with_stdio *thr)
+{
+ return thr->stdio.output_buf.pos ||
+ thr->output2.nr ||
+ thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+ size_t copied = 0, b;
+ int ret = 0;
+
+ if ((file->f_flags & O_NONBLOCK) &&
+ !thread_with_stdio_has_output(thr))
+ return -EAGAIN;
+
+ ret = wait_event_interruptible(thr->stdio.output_wait,
+ thread_with_stdio_has_output(thr));
+ if (ret)
+ return ret;
+
+ if (thr->thr.done)
+ return 0;
+
+ while (len) {
+ ret = darray_make_room(&thr->output2, thr->stdio.output_buf.pos);
+ if (ret)
+ break;
+
+ spin_lock_irq(&thr->stdio.output_lock);
+ b = min_t(size_t, darray_room(thr->output2), thr->stdio.output_buf.pos);
+
+ memcpy(&darray_top(thr->output2), thr->stdio.output_buf.buf, b);
+ memmove(thr->stdio.output_buf.buf,
+ thr->stdio.output_buf.buf + b,
+ thr->stdio.output_buf.pos - b);
+
+ thr->output2.nr += b;
+ thr->stdio.output_buf.pos -= b;
+ spin_unlock_irq(&thr->stdio.output_lock);
+
+ b = min(len, thr->output2.nr);
+ if (!b)
+ break;
+
+ b -= copy_to_user(buf, thr->output2.data, b);
+ if (!b) {
+ ret = -EFAULT;
+ break;
+ }
+
+ copied += b;
+ buf += b;
+ len -= b;
+
+ memmove(thr->output2.data,
+ thr->output2.data + b,
+ thr->output2.nr - b);
+ thr->output2.nr -= b;
+ }
+
+ return copied ?: ret;
+}
+
+static int thread_with_stdio_release(struct inode *inode, struct file *file)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+
+ bch2_thread_with_file_exit(&thr->thr);
+ printbuf_exit(&thr->stdio.input_buf);
+ printbuf_exit(&thr->stdio.output_buf);
+ darray_exit(&thr->output2);
+ thr->exit(thr);
+ return 0;
+}
+
+#define WRITE_BUFFER 4096
+
+static inline bool thread_with_stdio_has_input_space(struct thread_with_stdio *thr)
+{
+ return thr->stdio.input_buf.pos < WRITE_BUFFER || thr->thr.done;
+}
+
+static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubuf,
+ size_t len, loff_t *ppos)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+ struct printbuf *buf = &thr->stdio.input_buf;
+ size_t copied = 0;
+ ssize_t ret = 0;
+
+ while (len) {
+ if (thr->thr.done) {
+ ret = -EPIPE;
+ break;
+ }
+
+ size_t b = len - fault_in_readable(ubuf, len);
+ if (!b) {
+ ret = -EFAULT;
+ break;
+ }
+
+ spin_lock(&thr->stdio.input_lock);
+ if (buf->pos < WRITE_BUFFER)
+ bch2_printbuf_make_room(buf, min(b, WRITE_BUFFER - buf->pos));
+ b = min(len, printbuf_remaining_size(buf));
+
+ if (b && !copy_from_user_nofault(&buf->buf[buf->pos], ubuf, b)) {
+ ubuf += b;
+ len -= b;
+ copied += b;
+ buf->pos += b;
+ }
+ spin_unlock(&thr->stdio.input_lock);
+
+ if (b) {
+ wake_up(&thr->stdio.input_wait);
+ } else {
+ if ((file->f_flags & O_NONBLOCK)) {
+ ret = -EAGAIN;
+ break;
+ }
+
+ ret = wait_event_interruptible(thr->stdio.input_wait,
+ thread_with_stdio_has_input_space(thr));
+ if (ret)
+ break;
+ }
+ }
+
+ return copied ?: ret;
+}
+
+static __poll_t thread_with_stdio_poll(struct file *file, struct poll_table_struct *wait)
+{
+ struct thread_with_stdio *thr =
+ container_of(file->private_data, struct thread_with_stdio, thr);
+
+ poll_wait(file, &thr->stdio.output_wait, wait);
+ poll_wait(file, &thr->stdio.input_wait, wait);
+
+ __poll_t mask = 0;
+
+ if (thread_with_stdio_has_output(thr))
+ mask |= EPOLLIN;
+ if (thread_with_stdio_has_input_space(thr))
+ mask |= EPOLLOUT;
+ if (thr->thr.done)
+ mask |= EPOLLHUP|EPOLLERR;
+ return mask;
+}
+
+static const struct file_operations thread_with_stdio_fops = {
+ .release = thread_with_stdio_release,
+ .read = thread_with_stdio_read,
+ .write = thread_with_stdio_write,
+ .poll = thread_with_stdio_poll,
+ .llseek = no_llseek,
+};
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *thr,
+ void (*exit)(struct thread_with_stdio *),
+ int (*fn)(void *))
+{
+ thr->stdio.input_buf = PRINTBUF;
+ thr->stdio.input_buf.atomic++;
+ spin_lock_init(&thr->stdio.input_lock);
+ init_waitqueue_head(&thr->stdio.input_wait);
+
+ thr->stdio.output_buf = PRINTBUF;
+ thr->stdio.output_buf.atomic++;
+ spin_lock_init(&thr->stdio.output_lock);
+ init_waitqueue_head(&thr->stdio.output_wait);
+
+ darray_init(&thr->output2);
+ thr->exit = exit;
+
+ return bch2_run_thread_with_file(&thr->thr, &thread_with_stdio_fops, fn);
+}
+
+int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+ wait_event(stdio->input_wait,
+ stdio->input_buf.pos || stdio->done);
+
+ if (stdio->done)
+ return -1;
+
+ spin_lock(&stdio->input_lock);
+ int ret = min(len, stdio->input_buf.pos);
+ stdio->input_buf.pos -= ret;
+ memcpy(buf, stdio->input_buf.buf, ret);
+ memmove(stdio->input_buf.buf,
+ stdio->input_buf.buf + ret,
+ stdio->input_buf.pos);
+ spin_unlock(&stdio->input_lock);
+
+ wake_up(&stdio->input_wait);
+ return ret;
+}
+
+int bch2_stdio_redirect_readline(struct stdio_redirect *stdio, char *buf, size_t len)
+{
+ wait_event(stdio->input_wait,
+ stdio->input_buf.pos || stdio->done);
+
+ if (stdio->done)
+ return -1;
+
+ spin_lock(&stdio->input_lock);
+ int ret = min(len, stdio->input_buf.pos);
+ char *n = memchr(stdio->input_buf.buf, '\n', ret);
+ if (n)
+ ret = min(ret, n + 1 - stdio->input_buf.buf);
+ stdio->input_buf.pos -= ret;
+ memcpy(buf, stdio->input_buf.buf, ret);
+ memmove(stdio->input_buf.buf,
+ stdio->input_buf.buf + ret,
+ stdio->input_buf.pos);
+ spin_unlock(&stdio->input_lock);
+
+ wake_up(&stdio->input_wait);
+ return ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/libbcachefs/thread_with_file.h b/libbcachefs/thread_with_file.h
new file mode 100644
index 00000000..05879c50
--- /dev/null
+++ b/libbcachefs/thread_with_file.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_H
+#define _BCACHEFS_THREAD_WITH_FILE_H
+
+#include "thread_with_file_types.h"
+
+struct task_struct;
+
+struct thread_with_file {
+ struct task_struct *task;
+ int ret;
+ bool done;
+};
+
+void bch2_thread_with_file_exit(struct thread_with_file *);
+int bch2_run_thread_with_file(struct thread_with_file *,
+ const struct file_operations *,
+ int (*fn)(void *));
+
+struct thread_with_stdio {
+ struct thread_with_file thr;
+ struct stdio_redirect stdio;
+ DARRAY(char) output2;
+ void (*exit)(struct thread_with_stdio *);
+};
+
+static inline void thread_with_stdio_done(struct thread_with_stdio *thr)
+{
+ thr->thr.done = true;
+ thr->stdio.done = true;
+ wake_up(&thr->stdio.input_wait);
+ wake_up(&thr->stdio.output_wait);
+}
+
+int bch2_run_thread_with_stdio(struct thread_with_stdio *,
+ void (*exit)(struct thread_with_stdio *),
+ int (*fn)(void *));
+int bch2_stdio_redirect_read(struct stdio_redirect *, char *, size_t);
+int bch2_stdio_redirect_readline(struct stdio_redirect *, char *, size_t);
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_H */
diff --git a/libbcachefs/thread_with_file_types.h b/libbcachefs/thread_with_file_types.h
new file mode 100644
index 00000000..90b5e645
--- /dev/null
+++ b/libbcachefs/thread_with_file_types.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+#define _BCACHEFS_THREAD_WITH_FILE_TYPES_H
+
+struct stdio_redirect {
+ spinlock_t output_lock;
+ wait_queue_head_t output_wait;
+ struct printbuf output_buf;
+
+ spinlock_t input_lock;
+ wait_queue_head_t input_wait;
+ struct printbuf input_buf;
+ bool done;
+};
+
+#endif /* _BCACHEFS_THREAD_WITH_FILE_TYPES_H */
diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h
index 427edb3e..c94876b3 100644
--- a/libbcachefs/trace.h
+++ b/libbcachefs/trace.h
@@ -72,6 +72,27 @@ DECLARE_EVENT_CLASS(trans_str,
__entry->trans_fn, (void *) __entry->caller_ip, __get_str(str))
);
+DECLARE_EVENT_CLASS(trans_str_nocaller,
+ TP_PROTO(struct btree_trans *trans, const char *str),
+ TP_ARGS(trans, str),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __array(char, trans_fn, 32 )
+ __string(str, str )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = trans->c->dev;
+ strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+ __assign_str(str, str);
+ ),
+
+ TP_printk("%d,%d %s %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->trans_fn, __get_str(str))
+);
+
DECLARE_EVENT_CLASS(btree_node_nofs,
TP_PROTO(struct bch_fs *c, struct btree *b),
TP_ARGS(c, b),
@@ -1243,11 +1264,10 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure,
TP_ARGS(trans, caller_ip, path)
);
-DEFINE_EVENT(trans_str, trans_restart_would_deadlock,
+DEFINE_EVENT(trans_str_nocaller, trans_restart_would_deadlock,
TP_PROTO(struct btree_trans *trans,
- unsigned long caller_ip,
const char *cycle),
- TP_ARGS(trans, caller_ip, cycle)
+ TP_ARGS(trans, cycle)
);
DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit,
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 2e4c5d96..c2ef7cdd 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -267,7 +267,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
console_unlock();
}
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task)
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr)
{
#ifdef CONFIG_STACKTRACE
unsigned nr_entries = 0;
@@ -282,7 +282,7 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task)
return -1;
do {
- nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, 0);
+ nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1);
} while (nr_entries == stack->size &&
!(ret = darray_make_room(stack, stack->size * 2)));
@@ -303,10 +303,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack)
}
}
-int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task)
+int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr)
{
bch_stacktrace stack = { 0 };
- int ret = bch2_save_backtrace(&stack, task);
+ int ret = bch2_save_backtrace(&stack, task, skipnr + 1);
bch2_prt_backtrace(out, &stack);
darray_exit(&stack);
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 4290e0a5..c75fc319 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -347,9 +347,18 @@ void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
void bch2_print_string_as_lines(const char *prefix, const char *lines);
typedef DARRAY(unsigned long) bch_stacktrace;
-int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *);
+int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned);
void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *);
-int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *);
+int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned);
+
+static inline void prt_bdevname(struct printbuf *out, struct block_device *bdev)
+{
+#ifdef __KERNEL__
+ prt_printf(out, "%pg", bdev);
+#else
+ prt_str(out, bdev->name);
+#endif
+}
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)